Heatmaps of OR gene expression across single dimension positions annotated with features, examination of gene covariance and calculation of glomerulus symmetry
Load packages, functions and data
knitr::opts_chunk$set(warning=F)
#packages
library(heatmaply)
library(reshape2)
library(gtools)
library(patchwork)
library(plotly)
library(tidyverse)
library(cowplot)
library(uwot)
#functions
NormalizeMaxMin <- function(x) {
#x needs to be pseudocounted
(x-min(x))/(max(x)-min(x))
}
Geo_mean <- function(x, na.rm=TRUE) {
exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))
}
#given raw TPM data, normalize values to between 0 and 1 (inclusive)
NormTPM <- function(df_tpm) {
x_vals <- df_tpm %>% select(-slice) %>% t()
x_vals <- x_vals + 0.25
x_norm <- matrix(nrow = nrow(x_vals), ncol = ncol(x_vals))
for (i in 1:nrow(x_vals)) {
if (min(x_vals[i,]) - max(x_vals[i,]) == 0) {
x_norm[i,] <- rep(0, length(x_vals[i,]))
} else {
x_norm[i,] <- NormalizeMaxMin(x_vals[i,])
}
} #endfor
colnames(x_norm) <- df_tpm$slice
ornames <- rownames(x_vals)
rownames(x_norm) <- ornames
x_norm
}
#given normalized data and a list with "gene" and "sortrank" column, sort the normalized data
#list must have ORs in "gene" column and rank in "sortrank" column
SortByList <- function(df_norm, list) {
df_tib <- as_tibble(df_norm)
df_tib$gene <- rownames(df_norm)
df_info <- left_join(df_tib, list, by = "gene") %>% arrange(sortrank)
genesortlist <- df_info$gene
df_matrix <- t(t(df_info %>% select(-gene, -sortrank)))
rownames(df_matrix) <- genesortlist
df_matrix
}
#given tpm data, apply factors based on kallisto quantified OMP values
Ompnormify <- function(df_tpm) {
soi <- df_tpm %>% select(-slice) %>% t()
soi <- soi + 0.25
for (i in 1:ncol(soi)) {
soi[,i] = soi[,i] / ompNorms[i]
} #endfor
soi_norm <- matrix(nrow = nrow(soi), ncol = ncol(soi))
for (i in 1:nrow(soi)) {
soi_norm[i,] <- NormalizeMaxMin(soi[i,])
} #endfor
colnames(soi_norm) <- df_tpm$slice
ornames <- rownames(soi)
rownames(soi_norm) <- ornames
soi_norm
}
#given tpm data, apply factors based on OB shape from MRI model
Voxnormify <- function(df_tpm, voxweights) {
soi <- df_tpm %>% select(-slice) %>% t()
soi <- soi + 0.25
for (i in 1:ncol(soi)) {
soi[,i] <- soi[,i] * voxweights$weight[i]
} #endfor
soi_norm <- matrix(nrow = nrow(soi), ncol = ncol(soi))
for (i in 1:nrow(soi)) {
soi_norm[i,] <- NormalizeMaxMin(soi[i,])
} #endfor
colnames(soi_norm) <- df_tpm$slice
ornames <- rownames(soi)
rownames(soi_norm) <- ornames
soi_norm
}
normNormDF4 <- function(df1, df2, df3, df4) {
df_merge <- matrix(nrow = nrow(df1), ncol = ncol(df1))
for (i in 1:ncol(df1)) {
df_merge[,i] <- df1[,i] + df2[,i] + df3[,i] + df4[,i]
}
#normalize merged normalized values
merge_vals <- df_merge
merge_norm <- matrix(nrow = nrow(merge_vals), ncol = ncol(merge_vals))
for (i in 1:nrow(merge_vals)) {
merge_norm[i,] <- NormalizeMaxMin(merge_vals[i,])
}
#name rows and columns
colnames(merge_norm) <- colnames(df1)
ornames <- rownames(df1)
rownames(merge_norm) <- ornames
return(merge_norm)
}
#need to learn how to do ... args
normNormDF3 <- function(df1, df2, df3) {
df_merge <- matrix(nrow = nrow(df1), ncol = ncol(df1))
for (i in 1:ncol(df1)) {
df_merge[,i] <- df1[,i] + df2[,i] + df3[,i]
}
#normalize merged normalized values
merge_vals <- df_merge
merge_norm <- matrix(nrow = nrow(merge_vals), ncol = ncol(merge_vals))
for (i in 1:nrow(merge_vals)) {
merge_norm[i,] <- NormalizeMaxMin(merge_vals[i,])
}
#name rows and columns
colnames(merge_norm) <- colnames(df1)
ornames <- rownames(df1)
rownames(merge_norm) <- ornames
return(merge_norm)
}
#given a set of normalized values, merge them into 1 df, renormalize, sort, return sortlist
Sorting_Factory4 <- function(df1, df2, df3, df4, method = "kzsort") {
merge_norm <- normNormDF4(df1, df2, df3, df4)
merge_bin <- merge_norm
ornames <- rownames(df1)
if (method == "kzsort") {
#sort based on position of maximum expression section and
#distance to the mean of positions for the three highest expression sections
#higher distances being placed further away, no management for ties.
#create a maxsec variable
#anything lower than 1 becomes 0
merge_bin[merge_bin < 1] <- 0
maxsec <- vector(mode = "numeric", length = nrow(merge_bin))
for (i in 1:nrow(merge_bin)) {
for (s in 1:ncol(merge_bin)) {
if (merge_bin[i,s] == 1) {
maxsec[i] <- s
}
}
}
#find avg position of top 3 sections
avgpos3 <- vector(mode = "numeric", length = nrow(merge_bin))
for (i in 1:nrow(merge_bin)) {
avgpos3[i] <- mean(which(min_rank(desc(merge_norm[i,])) <= 3))
}
max2avg <- vector(mode = "numeric", length = nrow(merge_bin))
for (i in 1:length(maxsec)) {
max2avg[i] <- abs(maxsec[i] - avgpos3[i])
}
sorttable <- tibble(gene = ornames, maxsec, avgpos3, max2avg) %>%
arrange(maxsec, max2avg) %>%
mutate(sortrank = 1:length(ornames)) %>%
select(gene, sortrank)
} else if (method == "posmean") {
#weighted avg based on python code for "find_1_glom"
#weighted_avg = sum(x*y for x,y in zip(indexes,vals))/sum(vals)
#zip(c(1,2,3), c("a","b","c")) = (1,"a"), (2,"b"), (3,"c")
secs <- c(1:23)
wavgs <- vector(mode = "numeric", length = nrow(merge_bin))
for (i in 1:nrow(merge_norm)) {
vals <- merge_norm[i,]
sumvals <- sum(vals)
prods <- vector(mode = "numeric", length = length(vals))
for (s in 1:length(vals)) {
prods[s] <- vals[s] * secs[s]
}
wavgs[i] <- sum(prods)/sumvals
}
sorttable <- tibble(gene = ornames, wavgs) %>%
arrange(wavgs) %>%
mutate(sortrank = 1:length(ornames)) %>%
select(gene, sortrank)
} else if (method == "123") {
#which secs are ranked 1,2,3
onesec <- vector(mode = "numeric", length = nrow(merge_norm))
for (i in 1:nrow(merge_norm)) {
onesec[i] <- which(min_rank(desc(merge_norm[i,])) == 2)
}
twosec <- vector(mode = "numeric", length = nrow(merge_norm))
for (i in 1:nrow(merge_norm)) {
twosec[i] <- which(min_rank(desc(merge_norm[i,])) == 2)
}
threesec <- vector(mode = "numeric", length = nrow(merge_norm))
for (i in 1:nrow(merge_norm)) {
threesec[i] <- which(min_rank(desc(merge_norm[i,])) == 3)
}
sorttable <- tibble(gene = ornames, onesec, twosec, threesec) %>%
arrange(onesec, twosec, threesec) %>%
mutate(sortrank = 1:length(ornames)) %>%
select(gene, sortrank)
}
return(sorttable)
}
#given a set of normalized values, merge them into 1 df, renormalize, sort, return sortlist
Sorting_Factory3 <- function(df1, df2, df3, method = "kzsort") {
merge_norm <- normNormDF3(df1, df2, df3)
merge_bin <- merge_norm
ornames <- rownames(df1)
if (method == "kzsort") {
#sort based on position of maximum expression section and
#distance to the mean of positions for the three highest expression sections
#higher distances being placed further away, no management for ties.
#create a maxsec variable
#anything lower than 1 becomes 0
merge_bin[merge_bin < 1] <- 0
maxsec <- vector(mode = "numeric", length = nrow(merge_bin))
for (i in 1:nrow(merge_bin)) {
for (s in 1:ncol(merge_bin)) {
if (merge_bin[i,s] == 1) {
maxsec[i] <- s
}
}
}
#find avg position of top 3 sections
avgpos3 <- vector(mode = "numeric", length = nrow(merge_bin))
for (i in 1:nrow(merge_bin)) {
avgpos3[i] <- mean(which(min_rank(desc(merge_norm[i,])) <= 3))
}
max2avg <- vector(mode = "numeric", length = nrow(merge_bin))
for (i in 1:length(maxsec)) {
max2avg[i] <- abs(maxsec[i] - avgpos3[i])
}
sorttable <- tibble(gene = ornames, maxsec, avgpos3, max2avg) %>%
arrange(maxsec, max2avg) %>%
mutate(sortrank = 1:length(ornames)) %>%
select(gene, sortrank)
} else if (method == "posmean") {
#weighted avg based on python code for "find_1_glom"
#weighted_avg = sum(x*y for x,y in zip(indexes,vals))/sum(vals)
#zip(c(1,2,3), c("a","b","c")) = (1,"a"), (2,"b"), (3,"c")
secs <- c(1:23)
wavgs <- vector(mode = "numeric", length = nrow(merge_bin))
for (i in 1:nrow(merge_norm)) {
vals <- merge_norm[i,]
sumvals <- sum(vals)
prods <- vector(mode = "numeric", length = length(vals))
for (s in 1:length(vals)) {
prods[s] <- vals[s] * secs[s]
}
wavgs[i] <- sum(prods)/sumvals
}
sorttable <- tibble(gene = ornames, wavgs) %>%
arrange(wavgs) %>%
mutate(sortrank = 1:length(ornames)) %>%
select(gene, sortrank)
} else if (method == "123") {
#which secs are ranked 1,2,3
onesec <- vector(mode = "numeric", length = nrow(merge_norm))
for (i in 1:nrow(merge_norm)) {
onesec[i] <- which(min_rank(desc(merge_norm[i,])) == 2)
}
twosec <- vector(mode = "numeric", length = nrow(merge_norm))
for (i in 1:nrow(merge_norm)) {
twosec[i] <- which(min_rank(desc(merge_norm[i,])) == 2)
}
threesec <- vector(mode = "numeric", length = nrow(merge_norm))
for (i in 1:nrow(merge_norm)) {
threesec[i] <- which(min_rank(desc(merge_norm[i,])) == 3)
}
sorttable <- tibble(gene = ornames, onesec, twosec, threesec) %>%
arrange(onesec, twosec, threesec) %>%
mutate(sortrank = 1:length(ornames)) %>%
select(gene, sortrank)
}
return(sorttable)
}
#given a normalized matrix, output old style Black(0) to Red(1) heatmaps
MakeBlackRedHeatmap <- function(df_norm, title = NA) {
df_df <- as.data.frame(df_norm)
df_df$target = rownames(df_df)
df_melt <- melt(df_df, id.vars ="target")
names(df_melt)[2:3] <- c("section","normTPM")
#make genelist into character vector
df_melt$target <- as.character(df_melt$target)
df_melt$target <- factor(df_melt$target, levels=unique(df_melt$target))
if (is.na(title)) {
#HEAT DA MAP generic
plot <- ggplot(df_melt, aes(section, target, height = 3)) +
geom_tile(aes(fill = normTPM), color = "grey") +
scale_fill_gradient2(low = "black", mid = "red", high = "red3", midpoint = 0.65) +
ylab("Olfr Genes") +
theme(legend.title = element_text(size = 11, vjust = 1),
legend.text = element_text(size = 7),
legend.position = "none", #none if you dont want expression level legend
plot.title = element_text(size = 24),
axis.title.x = element_text(size = 0, vjust = 50, face="bold"),
axis.title.y = element_text(size = 0, vjust = 0, face="bold"),
axis.text.x = element_text(angle = 330, hjust = 0, vjust = 2, size = 0),
axis.text.y = element_text(size = 0),
axis.ticks = element_blank()) +
labs(fill = "Expression Level")
return(plot)
} else {
#HEAT DA MAP generic
plot <- ggplot(df_melt, aes(section, target, height = 3)) +
geom_tile(aes(fill = normTPM), color = "grey") +
scale_fill_gradient2(low = "black", mid = "red", high = "red3", midpoint = 0.65) +
ylab("Olfr Genes") +
theme(legend.title = element_text(size = 11, vjust = 1),
legend.text = element_text(size = 7),
legend.position = "none", #none if you dont want expression level legend
plot.title = element_text(size = 24),
axis.title.x = element_text(size = 0, vjust = 50, face="bold"),
axis.title.y = element_text(size = 0, vjust = 0, face="bold"),
axis.text.x = element_text(angle = 330, hjust = 0, vjust = 2, size = 0),
axis.text.y = element_text(size = 0),
axis.ticks = element_blank()) +
labs(fill = "Expression Level") +
ggtitle(title)
return(plot)
}
}
#Make Black Red Heatmaps
MakeBLRHtree3 <- function(df1, df2, df3, tree, title = NA) {
treeset1 <- df1[which(rownames(df1) %in% tree),]
treeset2 <- df2[which(rownames(df2) %in% tree),]
treeset3 <- df3[which(rownames(df3) %in% tree),]
treesort <- Sorting_Factory3(treeset1, treeset2, treeset3, method = "posmean")
merge <- normNormDF3(treeset1, treeset2, treeset3)
mergesort <- SortByList(merge, treesort)
p <- MakeBlackRedHeatmap(mergesort, title)
return(p)
}
MakeBLRHtree4 <- function(df1, df2, df3, df4, tree, title = NA, out = "heat") {
treeset1 <- df1[which(rownames(df1) %in% tree),]
treeset2 <- df2[which(rownames(df2) %in% tree),]
treeset3 <- df3[which(rownames(df3) %in% tree),]
treeset4 <- df4[which(rownames(df4) %in% tree),]
treesort <- Sorting_Factory4(treeset1, treeset2, treeset3, treeset4, method = "posmean")
merge <- normNormDF4(treeset1, treeset2, treeset3, treeset4)
mergesort <- SortByList(merge, treesort)
p <- MakeBlackRedHeatmap(mergesort, title)
if (out == "heat") {
return(p)
} else if (out == "names") {
return(treesort)
}
}
#given 2 matrixes, compute spearman correlation of rows (OR vs OR)
Spear_mtx <- function(df1, df2) {
spear_vec <- vector(length = nrow(df1), mode = "numeric")
for (i in 1:nrow(df1)) {
spear_vec[i] <- cor(df1[i,], df2[i,], method = "spearman")
}
return(spear_vec)
}
#apply Spear_mtx to multiple matrices(just 3 or 4)
#probably need to rewrite using permutation/combination
Multi_Spear_mtx <- function(...){
arguments <- list(...)
many <- length(arguments)
if (many == 3) {
mat3 <- matrix(ncol = 3, nrow = nrow(arguments[[1]]))
colnames(mat3) <- c("1v2", "1v3", "2v3")
ornames <- rownames(arguments[[1]])
rownames(mat3) <- ornames
mat3[,1] <- Spear_mtx(arguments[[1]], arguments[[2]])
mat3[,2] <- Spear_mtx(arguments[[1]], arguments[[3]])
mat3[,3] <- Spear_mtx(arguments[[2]], arguments[[3]])
mat3
} else if(many == 4) {
mat4 <- matrix(ncol = 6, nrow = nrow(arguments[[1]]))
colnames(mat4) <- c("1v2", "1v3", "1v4", "2v3", "2v4", "3v4")
ornames <- rownames(arguments[[1]])
rownames(mat4) <- ornames
mat4[,1] <- Spear_mtx(arguments[[1]], arguments[[2]])
mat4[,2] <- Spear_mtx(arguments[[1]], arguments[[3]])
mat4[,3] <- Spear_mtx(arguments[[1]], arguments[[4]])
mat4[,4] <- Spear_mtx(arguments[[2]], arguments[[3]])
mat4[,5] <- Spear_mtx(arguments[[2]], arguments[[4]])
mat4[,6] <- Spear_mtx(arguments[[3]], arguments[[4]])
mat4
}
}
#Find an Anterior Peak and a Posterior Peak
#note that the rank df solves ties using "random" in order to prevent passing a vector when searching for rank X.
AntPeakPostPeak <- function(df_norm) {
#rank df
rank_df <- matrix(nrow = nrow(df_norm), ncol = ncol(df_norm))
for (i in 1:nrow(df_norm)) {
rank_df[i,] <- rank(desc(df_norm[i,]), ties.method = "random")
}
#find max sec
maxsec <- vector(mode = "numeric", length = nrow(rank_df))
maxval <- vector(mode = "numeric", length = nrow(rank_df))
for (i in 1:nrow(rank_df)) {
for (s in 1:ncol(rank_df)) {
if (rank_df[i,s] == min(rank_df[i,])) {
maxsec[i] <- s
maxval[i] <- df_norm[i,s]
}
}
}
#find next
nextsec <- rep(NA, length(maxsec))
nextval <- vector(mode = "numeric", length = nrow(rank_df))
nextmany <- vector(mode = "numeric", length = nrow(rank_df))
maxsecin <- ncol(rank_df)
for (i in 1:length(maxsec)) {
#set findrank
findrank <- 2
#set neighbors
if (maxsec[i] == 1) {
min_neigh <- 1
max_neigh <- 2
} else if (maxsec[i] == maxsecin) {
min_neigh <- maxsecin - 1
max_neigh <- maxsecin
} else {
min_neigh <- maxsec[i] - 1
max_neigh <- maxsec[i] + 1
}
#while we have not found nextsec
while (is.na(nextsec[i])) {
potentialsec <- which(rank_df[i,] == findrank)
#if potentialsec is a neighbor, bump findrank and neighbor
if (between(potentialsec, min_neigh, max_neigh)) {
findrank <- findrank + 1
#bump neighbor
if (potentialsec == min_neigh) {
min_neigh <- min_neigh - 1
} else if (potentialsec == max_neigh) {
max_neigh <- max_neigh + 1
}
} else if (between(potentialsec, min_neigh, max_neigh) == FALSE) {
nextsec[i] <- potentialsec
nextval[i] <- df_norm[i, potentialsec]
} else {
nextsec[i] <- -1
nextval[i] <- -1
}
}
}
#notate Ant and Post Peaks
antpeak <- vector(mode = "numeric", length = nrow(rank_df))
postpeak <- vector(mode = "numeric", length = nrow(rank_df))
antval <- vector(mode = "numeric", length = nrow(rank_df))
postval <- vector(mode = "numeric", length = nrow(rank_df))
for (i in 1:nrow(rank_df)) {
if (maxsec[i] < nextsec[i]) {
antpeak[i] <- maxsec[i]
antval[i] <- maxval[i]
postpeak[i] <- nextsec[i]
postval[i] <- nextval[i]
} else {
antpeak[i] <- nextsec[i]
antval[i] <- nextval[i]
postpeak[i] <- maxsec[i]
postval[i] <- maxval[i]
}
}
#output
gene <- rownames(df_norm)
record <- tibble(gene, antpeak, postpeak, antval, postval)
return(record)
}
#join APPP output, names should be like "_mX"
MultiAPPP <- function(df1, df2, df3, df4, name1, name2, name3, name4) {
rec1 <- AntPeakPostPeak(df1)
rec2 <- AntPeakPostPeak(df2)
rec3 <- AntPeakPostPeak(df3)
rec4 <- AntPeakPostPeak(df4)
join1 <- left_join(rec1, rec2, by = "gene", suffix = c(name1, name2))
join2 <- left_join(rec3, rec4, by = "gene", suffix = c(name3, name4))
endjoin <- left_join(join1, join2, by = "gene")
return(endjoin)
}
#using APPP, take a mean position of peaks
SymLiner <- function(ap, vd, ml, out = "plot") {
ap_appp <- AntPeakPostPeak(ap)
vd_appp <- AntPeakPostPeak(vd) %>%
rename(venpeak = antpeak,
dorpeak = postpeak,
venval = antval,
dorval = postval)
ml_appp <- AntPeakPostPeak(ml) %>%
rename(medpeak = antpeak,
latpeak = postpeak,
medval = antval,
latval = postval)
allpeaks <- left_join(ap_appp, vd_appp, by = "gene") %>% left_join(ml_appp, by = "gene")
peakdifs <- allpeaks %>%
rowwise() %>%
mutate(apdif = mean(c(antpeak, postpeak)),
vddif = mean(c(venpeak, dorpeak)),
mldif = mean(c(medpeak, latpeak))) %>%
ungroup()
if (out == "plot") {
plot <- ggplot(peakdifs) +
geom_point(aes(apdif, mldif, alpha = 0.1)) +
geom_smooth(aes(apdif, mldif), method = "lm", formula = y~x) +
theme(legend.position = "none")
return(plot)
} else if (out == "fit") {
fit <- coef(lm(peakdifs$vddif ~ peakdifs$apdif))
fitout <- summary(fit)
return(fitout)
} else {
return(peakdifs)
}
}
#Make triple heatmap of class, tanzone, and matsunami diff OE
PlotFeatures <- function(sortlistin, title = "AP Features", featureOut = "tri") {
withinfo <- sortlistin %>%
left_join(info %>% rename("gene" = "olfrname"), by = "gene") %>%
select(gene:fisurface) %>%
mutate(sortearly = ifelse(sortrank <= max(sortrank)/2, "early", "late"),
class_fct = as_factor(class))
stats_oe <- withinfo %>%
filter(!is.na(oe_region)) %>% select(sortearly, oe_region) %>%
group_by(sortearly) %>% count(oe_region) %>%
pivot_wider(names_from = sortearly, values_from = n) %>% as.matrix
stats_oe <- stats_oe[,-1]
class(stats_oe) <- "numeric"
test_oe <- fisher.test(stats_oe)
pval_oe <- format(test_oe$p.value, digits = 5)
stats_class <- withinfo %>%
filter(!is.na(class)) %>% select(sortearly, class) %>%
group_by(sortearly) %>% count(class) %>%
pivot_wider(names_from = sortearly, values_from = n) %>% as.matrix
stats_class <- stats_class[,-1]
class(stats_class) <- "numeric"
test_class <- fisher.test(stats_class)
pval_class <- format(test_class$p.value, digits = 5)
stats_fi <- withinfo %>%
filter(!is.na(fisurface)) %>% select(sortearly, fisurface) %>%
group_by(sortearly) %>% count(fisurface) %>%
pivot_wider(names_from = sortearly, values_from = n) %>% as.matrix
stats_fi <- stats_fi[,-1]
class(stats_fi) <- "numeric"
test_fi <- fisher.test(stats_fi)
pval_fi <- format(test_fi$p.value, digits = 5)
sorttanzone <- withinfo %>%
mutate(tzsimnorm = ifelse(tzsimple > 5, NA, tzsimple)) %>%
select(gene, tzsimnorm) %>%
filter(!is.na(tzsimnorm)) %>% as.matrix()
rownames(sorttanzone) <- sorttanzone[,1]
trimtanzone <- sorttanzone[,2]
trimtanzone <- as.numeric(trimtanzone)
if (is.na(title)) {
ylabname <- ""
} else if (title == "AP Features") {
ylabname <- "Anterior <<< >>> Posterior"
} else if (title == "VD Features") {
ylabname <- "Ventral <<< >>> Dorsal"
} else if (title == "ML Features") {
ylabname <- "Medial <<< >>> Lateral"
}
df_df <- as.data.frame(trimtanzone)
df_df$target = rownames(df_df)
df_melt <- melt(df_df, id.vars ="target")
names(df_melt)[2:3] <- c("Feature", "value")
#make genelist into character vector
df_melt$target <- as.character(df_melt$target)
df_melt$target <- factor(df_melt$target, levels=unique(df_melt$target))
tan <- ggplot(df_melt, aes(Feature, target, height = 3)) +
geom_tile(aes(fill = value), color = "grey") +
scale_fill_viridis() +
ylab(ylabname) +
guides(fill = guide_legend(nrow = 1)) +
theme(plot.margin = unit(c(1,1,1,1), "cm"),
legend.title = element_text(size = 12, vjust = 1),
legend.text = element_text(size = 6),
legend.position = c(0.5, -0.1),
plot.title = element_text(size = 20),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 15, vjust = 0, face="bold"),
axis.text.x = element_text(angle = 330, hjust = 0, vjust = 2, size = 0),
axis.text.y = element_text(size = 0),
axis.ticks = element_blank()) +
labs(fill = "MiyIndex") +
ggtitle(title)
oe <- ggplot(withinfo %>% filter(!is.na(oe_region)),
aes(sortrank, oe_region, fill = oe_region)) +
geom_violin() +
coord_flip() +
ylab(paste("Matsunami OE DiffE", paste("p = ", pval_oe, sep=""), sep = "\n")) +
scale_fill_manual(values=c("red", "black")) +
theme_cowplot() +
theme(legend.position = "none",
axis.line = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank())
class <- ggplot(withinfo %>% filter(!is.na(class_fct)),
aes(sortrank, class_fct, fill = class_fct)) +
geom_violin() +
coord_flip() +
ylab(paste("Class", paste("p = ", pval_class, sep=""), sep = "\n")) +
scale_fill_manual(values=c("red", "black")) +
theme_cowplot() +
theme(legend.position = "none",
axis.line = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank())
FIsurface <- ggplot(withinfo %>% filter(!is.na(fisurface)),
aes(sortrank, fisurface, fill = fisurface)) +
geom_violin() +
coord_flip() +
ylab(paste("FI surface enriched", paste("p = ", pval_fi, sep=""), sep = "\n")) +
scale_fill_manual(values=c("red", "black")) +
theme_cowplot() +
theme(legend.position = "none",
axis.line = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank())
if (featureOut == "tri") {
tri <- tan + oe + class
return(tri)
} else if (featureOut == "tan") {
return(tan)
} else if (featureOut == "oe") {
return(oe + ggtitle(ylabname))
} else if (featureOut == "class") {
return(class + ggtitle(ylabname))
} else if (featureOut == "fi") {
return(FIsurface + ggtitle(ylabname))
} #else if
} #end function
#function for distance between 3d points
Dist3d <- function(df, row1, row2) {
distance <- sqrt((df$AntPos[row1] - df$AntPos[row2])^2 +
(df$MedLat[row1] - df$MedLat[row2])^2 +
(df$VenDor[row1] - df$VenDor[row2])^2)
return(distance)
} #end function
#data for correlation matrix split into X many tree groups
PlotBranches <- function(branches) {
dend <- hclust(dist(meanpos_cor, method = "euclidean"), method = "complete")
#2d stuff
branchheatmaps <- vector("list", length = branches)
branchsize <- vector("numeric", length = branches)
branchcor <- vector("numeric", length = branches)
#3d stuff
branch3d <- vector("list", length = branches)
branchMdist <- vector("list", length = branches)
branchLdist <- vector("list", length = branches)
branchavgdist <- vector("numeric", length = branches)
for (i in 1:branches) {
if (i %% 50 == 0) {
message(i)
}
names <- names(which(cutree(dend, k = branches) == i))
#remove ectopic
ectopic <- c("Olfr32", "Olfr287")
#checking for 1 at a time due to warning: the condition has length > 1 and only first element will be used
if (max(ectopic %in% names) == 1) {
names <- names[-which(names %in% ectopic)]
} #endif
branchsize[i] <- length(names)
if(branchsize[i] > 1){
title_ap <- paste("AP-branch", as.character(i), sep = "")
title_ml <- paste("ML-branch", as.character(i), sep = "")
title_vd <- paste("VD-branch", as.character(i), sep = "")
ap_plot <- MakeBLRHtree4(m4_Vnorm, m6_Vnorm, m7_Vnorm, m10_Vnorm,
names, title = title_ap)
ml_plot <- MakeBLRHtree3(m8_Vnorm, m11_Vnorm, m16_Vnorm,
names, title = title_ml)
vd_plot <- MakeBLRHtree3(m9_Vnorm, m12_Vnorm, m14_Vnorm,
names, title = title_vd)
branchheatmaps[[i]] <- ap_plot + ml_plot + vd_plot
#branchcor mean includes 1.0 values from ORx vs ORx
branchcor[i] <- mean(meanpos_cor[which(rownames(meanpos_cor) %in% names),
which(colnames(meanpos_cor) %in% names)])
branch3d[[i]] <- ListML(names, "point")
#calculate distance between points for ORs in group
data4dist <- ListDorML(names, chooseOut = "data") %>%
filter(p50 == clustmaxp)
dataMdist <- data4dist %>%
filter(side == "Medial") %>%
unique()
dataLdist <- data4dist %>%
filter(side == "Lateral") %>%
unique()
permM <- permutations(n=nrow(dataMdist), r=2)
permL <- permutations(n=nrow(dataLdist), r=2)
someM <- vector("numeric", length = nrow(permM))
someL <- vector("numeric", length = nrow(permL))
for (m in 1:nrow(permM)) {
someM[m] <- Dist3d(dataMdist, permM[m,1], permM[m,2])
}
for (l in 1:nrow(permL)) {
someL[l] <- Dist3d(dataLdist, permL[l,1], permL[l,2])
}
branchMdist[[i]] <- someM
branchLdist[[i]] <- someL
branchavgdist[i] <- mean(c(someM, someL))
} else {
branchcor[i] <- NA
branchheatmaps[[i]] <- NA
branchMdist[[i]] <- NA
branchLdist[[i]] <- NA
branchavgdist[i] <- NA
branch3d[[i]] <- DorsalML(olfr = names)
}#endif
}#endfor
branchout <- list(branchsize, branchcor, branchheatmaps,
branchMdist, branchLdist, branchavgdist, branch3d)
names(branchout) <- c("size", "cor", "heatmaps", "med_dist", "lat_dist", "avg_dist", "3D")
return(branchout)
} #end function
# 3d interactive scatterplots with plotly
#given an OR, pick a number of high probability voxels based on signal to noise ratios
Scat_rank <- function(olfr, topX = 72, chooseOut = "plot", title = NA) {
reranked <- ranked %>%
filter(olfrname == olfr) %>%
mutate(rankofrank = min_rank(voxRankSNR),
rankcol = min_rank(desc(ifelse(rankofrank <= topX, rankofrank, NA))),
isRanked = is.na(rankcol))
besties <- reranked %>% filter(isRanked == 0) %>% arrange(voxRankSNR)
worsties <- reranked %>% filter(isRanked == 1)
all <- bind_rows(besties, worsties)
if (chooseOut == "data") {
return(all)
} else {
p <- plot_ly(type = "scatter3d", mode = "markers") %>%
add_trace(data=worsties, x=~AntPos, y=~MedLat, z=~VenDor,
color=~rankcol, opacity=0.15,
text = ~paste('Gene:', olfrname,
'<br>voxRankSNR:', voxRankSNR,
'<br>voxSNRdim', voxSNRdim),
marker = list(size = 6)) %>%
add_trace(data=besties, x=~AntPos, y=~MedLat, z=~VenDor, color=~rankcol,
text = ~paste('Gene:', olfrname,
'<br>voxRankSNR:', voxRankSNR,
'<br>voxSNRdim', voxSNRdim),
marker = list(size = 6, line = list(color = 'black', width = 0.5))) %>%
layout(title = title,
scene = list(xaxis = list(title = 'Anterior-Posterior'),
yaxis = list(title = 'Medial-Lateral'),
zaxis = list(title = 'Ventral-Dorsal')))
return(p)
} #endif
} #endfunction
#plot p50 values for an olfrs top X voxels
p50plot <- function(olfr, rankx = 50, title = NA) {
bestp50 <- ranked %>%
filter(olfrname == olfr) %>%
mutate(rankp1 = min_rank(p50), rankp = min_rank(desc(rankp1))) %>% filter(rankp <= rankx)
worstp50 <- ranked %>%
filter(olfrname == olfr) %>% mutate(rankp = min_rank(p50)) %>%
filter(rankp > rankx) %>% mutate(rankna = NA)
plot_ly(type = "scatter3d", mode = "markers") %>%
add_trace(data=worstp50, x=~AntPos, y=~MedLat, z=~VenDor,
color='rgb(10,10,10)', opacity=0.1,
text = ~paste('Gene:', olfr,
'<br>voxRankSNR:', voxRankSNR,
'<br>voxSNRdim', voxSNRdim),
marker = list(size = 6)) %>%
add_trace(data=bestp50, x=~AntPos, y=~MedLat, z=~VenDor, color=~rankp,
text = ~paste('Gene:', olfr,
'<br>voxRankSNR:', voxRankSNR,
'<br>voxSNRdim', voxSNRdim),
marker = list(size = 6, line = list(color = 'black', width = 0.5))) %>%
layout(title = title,
scene = list(xaxis = list(title = 'Anterior-Posterior'),
yaxis = list(title = 'Medial-Lateral'),
zaxis = list(title = 'Ventral-Dorsal')))
} #endfunction
#define clusters of best X p50 points using a pairwise matrix and ability to output plots and data
#given a number of top ranking positions for an OR, cluster the points based on spatial position
Cluster <- function (olfr, topX = 72, minClustSize = 5, chooseOut = "plot", title = NA) {
df <- ranked %>%
filter(olfrname == olfr) %>%
mutate(rankofrank = min_rank(desc(min_rank(p50)))) %>%
filter(rankofrank <= topX) %>%
arrange(desc(p50))
cut <- ranked %>%
filter(olfrname == olfr) %>%
mutate(rankofrank = min_rank(desc(min_rank(p50)))) %>%
filter(rankofrank > topX) %>%
arrange(desc(p50))
#make pairwise of matrixes
dmatrix <- matrix(data = 0, nrow = nrow(df), ncol = nrow(df))
for (i in 1:nrow(df)) {
distances <- vector("numeric", length = nrow(df))
toprankvox <- df$voxel[i]
ap <- df$AntPos[i]
ml <- df$MedLat[i]
vd <- df$VenDor[i]
for (j in 1:nrow(df)) {
if (i == j) {
distances[j] <- 0
} else {
distances[j] <- sqrt((df$AntPos[i] - df$AntPos[j])^2 +
(df$MedLat[i] - df$MedLat[j])^2 +
(df$VenDor[i] - df$VenDor[j])^2)
} #endif
} #endforj
neighbors <- which(distances <= sqrt(3))
dmatrix[neighbors, i] <- 1
} #endfori
#cluster matrix
cluster_list <- rep(NA, nrow(df))
for (k in 1:ncol(dmatrix)) {
#skip points that are already in a cluster
if (is.na(cluster_list[k])) {
round <- 1
#do a bunch of rounds, find a way to have it run until it stops finding new points
while (round < topX/3) {
neigh <- which(dmatrix[,k] == 1)
#for each neighbor of previous round of neighbors, find new neighbors
for (l in 1:length(neigh)) {
neigh <- c(neigh, which(dmatrix[,neigh[l]] == 1))
neigh <- neigh[-which(duplicated(neigh))]
neigh <- sort(neigh)
} #endforl
round <- round + 1
} #endwhile
cluster_list[neigh] <- k
} else {
next
} #endif
} #endfork
df_out <- df %>% mutate(rawclust = cluster_list) %>%
group_by(rawclust) %>%
mutate(clustmaxp = max(p50),
clustminp = min(p50),
clustmeanp = mean(p50)) %>%
add_tally() %>%
ungroup() %>%
mutate(clustmaxprank = dense_rank(desc(clustmaxp)),
clustmeanprank = dense_rank(desc(clustmeanp)),
clustsizerank = dense_rank(desc(n))) %>%
arrange(clustsizerank) %>%
mutate(isCluster = ifelse(n >= minClustSize, 1, 0),
clust_unique = dense_rank(desc(clustmaxp * isCluster))) %>%
select(p2.5:ORrankpervox, rankofrank, rawclust:clustsizerank, clust_unique, isCluster)
df_clustered <- df_out %>% filter(isCluster == 1)
too_small <- df_out %>% filter(isCluster == 0.5)
cut_out <- cut %>% mutate(rawclust = NA,
clustmaxp = NA,
clustminp = NA,
clustmeanp = NA,
n = NA,
clustmeanprank = NA,
clustmaxprank = NA,
clustsizerank = NA,
isCluster = 0,
clust_unique = NA) %>%
select(p2.5:ORrankpervox,
rankofrank,
rawclust:clustsizerank,
clust_unique,
isCluster) %>%
bind_rows(too_small)
all_out <- bind_rows(df_clustered, cut_out)
#return various things 'rgb(10,10,10)'
if (chooseOut == "data") {
return(all_out)
} else {
p <- plot_ly(type = "scatter3d", mode = "markers") %>%
add_trace(data=cut_out, x=~AntPos, y=~MedLat, z=~VenDor,
color="shell", opacity=0.15,
text = ~paste('Gene:', olfrname,
'<br>C_size_rank:', clustsizerank,
'<br>C_mean_p50:', clustmeanp,
'<br>C_max_p50:', clustmaxp,
'<br>Cluster:', clust_unique),
marker = list(size = 6)) %>%
add_trace(data=df_out, x=~AntPos, y=~MedLat, z=~VenDor, color=~clust_unique,
text = ~paste('Gene:', olfrname,
'<br>C_size_rank:', clustsizerank,
'<br>C_mean_p50:', clustmeanp,
'<br>C_max_p50:', clustmaxp,
'<br>Cluster:', clust_unique),
marker = list(size = 6, line = list(color = 'black', width = 0.5))) %>%
layout(title = title,
scene = list(xaxis = list(title = 'Anterior-Posterior'),
yaxis = list(title = 'Medial-Lateral'),
zaxis = list(title = 'Ventral-Dorsal')))
return(p)
} #endif
} #endfunction
#run cluster given an incrementing number of voxels to cluster from
#needs to also output some sort of summary statistic to define the optimal number of initial voxels that returns the "best" clusters
BestML <- function(olfr, topMin = 50, topMax = 200, topBy = 25, minSize = 5,
clustersPerHalfBulb = 1, chooseOut = "plot", title = NA) {
cphb <- 0
topStep <- topMin
while (cphb < clustersPerHalfBulb) {
df_in <- Cluster(olfr, topX = topStep, minClustSize = minSize, chooseOut = "data")
df_ml <- df_in %>%
filter(isCluster == 1) %>%
group_by(clust_unique) %>%
mutate(meanML = mean(MedLat),
meanAP = mean(AntPos),
meanVD = mean(VenDor),
side = ifelse(meanML >= symline$mlvals[which(symline$apvals == round(meanAP))],
"Lateral", "Medial")) %>%
ungroup()
df_bestM <- df_ml %>%
filter(side == "Medial") %>%
mutate(MedRank = dense_rank(clust_unique),
LatRank = NA,
TopStep = topStep,
sideRank = MedRank) %>%
filter(MedRank <= clustersPerHalfBulb)
df_bestL <- df_ml %>%
filter(side == "Lateral") %>%
mutate(MedRank = NA,
LatRank = dense_rank(clust_unique),
TopStep = topStep,
sideRank = LatRank) %>%
filter(LatRank <= clustersPerHalfBulb)
clust_bestM <- unique(df_bestM$clust_unique)
clust_bestL <- unique(df_bestL$clust_unique)
#checks and counters
cphb <- min(length(clust_bestM), length(clust_bestL))
topStep <- topStep + topBy
} #endwhile
clust_bestML <- c(clust_bestM, clust_bestL)
which_bestML <- df_in[-which(df_ml$clust_unique %in% clust_bestML),]
df_notbest <- df_in %>%
mutate(sideRank = NA) %>%
select(-clust_unique) %>%
mutate(clust_unique = NA)
df_best <- bind_rows(df_bestM, df_bestL)
df_all <- bind_rows(df_best, df_notbest)
#output
if (chooseOut == "data") {
return(df_all)
} else if (chooseOut == "best") {
return(df_best)
} else if (chooseOut == "notbest") {
return(df_notbest)
} else {
p <- plot_ly(type = "scatter3d", mode = "markers") %>%
add_trace(data=df_notbest, x=~AntPos, y=~MedLat, z=~VenDor,
color="shell", opacity=0.15,
text = ~paste('Gene:', olfrname,
'<br>C_size_rank:', clustsizerank,
'<br>C_mean_p50:', clustmeanp,
'<br>C_max_p50:', clustmaxp,
'<br>Cluster:', clust_unique),
marker = list(size = 6)) %>%
add_trace(data=df_best, x=~AntPos, y=~MedLat, z=~VenDor, color=~clust_unique,
text = ~paste('Gene:', olfrname,
'<br>C_size_rank:', clustsizerank,
'<br>C_mean_ML:', meanML,
'<br>C_max_p50:', clustmaxp,
'<br>Cluster:', clust_unique,
'<br>SideRank:', sideRank),
marker = list(size = 6, line = list(color = 'black', width = 0.5))) %>%
layout(title = title,
scene = list(xaxis = list(title = 'Anterior-Posterior'),
yaxis = list(title = 'Medial-Lateral'),
zaxis = list(title = 'Ventral-Dorsal')))
return(p)
} #endif
} #endfunction
#given a list of Olfr names, output 1 medial and 1 lateral cluster for each name
#perhaps add an if or arg for really big lists to run a reduced step
ListML <- function(x, chooseOut = "plot", title = NA) {
#use a list to build a df of unknown size instead of bind_row each iteration
list_out <- vector("list", length = length(x))
for (i in 1:length(x)) {
list_out[[i]] <- BestML(x[i], topMin = 50, topMax = 150, topBy = 50,
minSize = 5, clustersPerHalfBulb = 1, chooseOut = "best")
} #endfor
#always need notbest for the shape shell1
notbest <- BestML(x[1], topMin = 100, topMax = 150, topBy = 50, minSize = 5,
clustersPerHalfBulb = 1, chooseOut = "notbest")
df_out <- bind_rows(list_out)
#output
if (chooseOut == "data") {
return(df_out)
} else if (chooseOut == "point") {
df_point <- df_out %>% filter(p50 == clustmaxp)
p <- plot_ly(type = "scatter3d", mode = "markers") %>%
add_trace(data=notbest, x=~AntPos, y=~MedLat, z=~VenDor,
color="shell", opacity=0.15,
text = ~paste('Gene:', olfrname,
'<br>C_size_rank:', clustsizerank,
'<br>C_mean_p50:', clustmeanp,
'<br>C_max_p50:', clustmaxp,
'<br>Cluster:', clust_unique),
marker = list(size = 6)) %>%
add_trace(data=df_point, x=~AntPos, y=~MedLat, z=~VenDor, color=~olfrname,
text = ~paste('Gene:', olfrname,
'<br>C_size_rank:', clustsizerank,
'<br>C_mean_ML:', meanML,
'<br>C_max_p50:', clustmaxp,
'<br>Cluster:', clust_unique,
'<br>SideRank:', sideRank),
marker = list(size = 6, line = list(color = 'black', width = 0.5))) %>%
layout(title = title,
scene = list(xaxis = list(title = 'Anterior-Posterior'),
yaxis = list(title = 'Medial-Lateral'),
zaxis = list(title = 'Ventral-Dorsal')))
return(p)
} else {
p <- plot_ly(type = "scatter3d", mode = "markers") %>%
add_trace(data=notbest, x=~AntPos, y=~MedLat, z=~VenDor,
color="shell", opacity=0.15,
text = ~paste('Gene:', olfrname,
'<br>C_size_rank:', clustsizerank,
'<br>C_mean_p50:', clustmeanp,
'<br>C_max_p50:', clustmaxp,
'<br>Cluster:', clust_unique),
marker = list(size = 6)) %>%
add_trace(data=df_out, x=~AntPos, y=~MedLat, z=~VenDor, color=~olfrname,
text = ~paste('Gene:', olfrname,
'<br>C_size_rank:', clustsizerank,
'<br>C_mean_ML:', meanML,
'<br>C_max_p50:', clustmaxp,
'<br>Cluster:', clust_unique,
'<br>SideRank:', sideRank),
marker = list(size = 6, line = list(color = 'black', width = 0.5))) %>%
layout(title = title,
scene = list(xaxis = list(title = 'Anterior-Posterior'),
yaxis = list(title = 'Medial-Lateral'),
zaxis = list(title = 'Ventral-Dorsal')))
return(p)
} #endif
} #endfunction
#run Cluster and check if either top ranked M or L glom is dorsal and has known dorsal expression or is class 1. If True, find a dorsal glom for both M and L
DorsalML <- function(olfr, topMin = 100, topBy = 100, minSize = 3,
clustIn = 5, clustOut = 1, chooseOut = "plot") {
print(olfr)
clustFound <- 0
topStep <- topMin
while (clustFound != clustOut) {
df_in <- Cluster(olfr, topX = topStep, minClustSize = minSize, chooseOut = "data")
df_ml <- df_in %>%
filter(isCluster == 1) %>%
group_by(clust_unique) %>%
mutate(meanML = mean(MedLat),
meanAP = mean(AntPos),
meanVD = mean(VenDor)) %>%
ungroup() %>%
rowwise() %>%
mutate(side = ifelse(meanML >= symline$mlvals[which(symline$apvals == round(meanAP))],
"Lateral", "Medial")) %>%
ungroup() %>%
left_join(info, by = "olfrname") %>%
rowwise() %>%
mutate(dorsalRating = sum(ifelse(class == 1, 2, 0),
ifelse(tzsimple < 2, 1, 0),
ifelse(oe_region == "Dorsal", 1, 0),
na.rm = T)) %>%
ungroup()
df_bestM <- df_ml %>%
filter(side == "Medial") %>%
mutate(MedRank = dense_rank(clust_unique),
LatRank = NA,
TopStep = topStep,
minSize = minSize,
clustIn = clustIn,
sideRank = MedRank) %>%
filter(MedRank <= clustIn) %>%
arrange(MedRank)
df_bestL <- df_ml %>%
filter(side == "Lateral") %>%
mutate(MedRank = NA,
LatRank = dense_rank(clust_unique),
TopStep = topStep,
minSize = minSize,
clustIn = clustIn,
sideRank = LatRank) %>%
filter(LatRank <= clustIn) %>%
arrange(LatRank)
max <- max(c(df_bestM$meanVD[1], df_bestL$meanVD[1]))
min <- min(c(df_bestM$meanVD[1], df_bestL$meanVD[1]))
#would be nice to remake using dplyr::case_when()
if (is.na(max)) {
print(paste("NA", as.character(topStep)))
topStep <- topStep + topBy
next
} else {
if (max >= 13) {
if (min < 13) {
if (df_bestM$dorsalRating[1] >= 2) {
#OR has dorsal info, constrain both med/lat to have dorsal glom
if (df_bestM$meanVD[1] == max) {
#Medial was dorsal, find dorsal lateral glom and viceversa
df_bestM <- df_bestM %>%
filter(MedRank == 1) %>% mutate(test = "1m")
df_bestL <- df_bestL %>%
filter(meanVD >= 13) %>%
filter(LatRank == min(LatRank)) %>% mutate(test = "1m")
} else if (df_bestL$meanVD[1] == max) {
df_bestL <- df_bestL %>%
filter(LatRank == 1) %>% mutate(test = "1l")
df_bestM <- df_bestM %>%
filter(meanVD >= 13) %>%
filter(MedRank == min(MedRank)) %>% mutate(test = "1l")
} #endif df_bestM$meanVD[1] == max
} else if (df_bestM$dorsalRating[1] == 1) {
#OR has mixed info, return best
df_bestM <- df_bestM %>% filter(MedRank == 1) %>% mutate(test = "2")
df_bestL <- df_bestL %>% filter(LatRank == 1) %>% mutate(test = "2")
} else {
#OR has ventral info, constrain both med/lat to have ventral glom
if (df_bestM$meanVD[1] == min) {
#Medial was ventral, find ventral ateral glom and viceversa
df_bestM <- df_bestM %>%
filter(MedRank == 1) %>% mutate(test = "3m")
df_bestL <- df_bestL %>%
filter(meanVD < 13) %>%
filter(LatRank == min(LatRank)) %>% mutate(test = "3m")
} else if (df_bestL$meanVD[1] == min) {
df_bestL <- df_bestL %>%
filter(LatRank == 1) %>% mutate(test = "3l")
df_bestM <- df_bestM %>%
filter(meanVD < 13) %>%
filter(MedRank == min(MedRank)) %>% mutate(test = "3l")
} #endif (df_bestM$meanVD[1] == min)
} #endif (df_bestM$dorsalRating[1] >= 2)
} #endif (min < 13)
} else {
#both gloms are ventral, check if dorsal info
if (df_bestM$dorsalRating[1] >= 2) {
#both gloms are ventral, but OR has dorsal info, constrain to dorsal
df_bestM <- df_bestM %>%
filter(meanVD >= 13) %>%
filter(MedRank == min(MedRank)) %>% mutate(test = "4")
df_bestL <- df_bestL %>%
filter(meanVD >= 13) %>%
filter(LatRank == min(LatRank)) %>% mutate(test = "4")
} else {
df_bestM <- df_bestM %>% filter(MedRank == 1) %>% mutate(test = "5")
df_bestL <- df_bestL %>% filter(LatRank == 1) %>% mutate(test = "5")
} #endif (df_bestM$dorsalRating[1] >= 2)
} #endif (max >= 13)
} #endif (is.nax(max))
df_bestM <- df_bestM %>% filter(MedRank == min(MedRank)) %>% mutate(test = "suck")
df_bestL <- df_bestL %>% filter(LatRank == min(LatRank)) %>% mutate(test = "suck")
clust_bestM <- unique(df_bestM$clust_unique)
clust_bestL <- unique(df_bestL$clust_unique)
#checks and counters
clustFound <- min(c(length(clust_bestM), length(clust_bestL)))
print(topStep)
topStep <- topStep + topBy
clustIn <- round(clustIn * 1.5)
} #endwhile
clust_bestML <- c(clust_bestM, clust_bestL)
which_bestML <- df_in[-which(df_ml$clust_unique %in% clust_bestML),]
df_best <- bind_rows(df_bestM, df_bestL) %>% unique()
df_notbest <- df_in %>%
mutate(sideRank = NA) %>%
select(-clust_unique) %>%
mutate(clust_unique = NA) %>%
unique()
df_all <- bind_rows(df_best, df_notbest) %>% unique()
#output
if (chooseOut == "data") {
return(df_all)
} else if (chooseOut == "best") {
return(df_best)
} else if (chooseOut == "notbest") {
return(df_notbest)
} else {
p <- plot_ly(type = "scatter3d", mode = "markers") %>%
add_trace(data=df_notbest, x=~AntPos, y=~MedLat, z=~VenDor,
color="shell", opacity=0.15,
text = ~paste('Gene:', olfrname,
'<br>C_size_rank:', clustsizerank,
'<br>C_mean_p50:', clustmeanp,
'<br>C_max_p50:', clustmaxp,
'<br>Cluster:', clust_unique),
marker = list(size = 6)) %>%
add_trace(data=df_best, x=~AntPos, y=~MedLat, z=~VenDor, color=~clust_unique,
text = ~paste('Gene:', olfrname,
'<br>C_size_rank:', clustsizerank,
'<br>C_mean_ML:', meanML,
'<br>C_max_p50:', clustmaxp,
'<br>Cluster:', clust_unique,
'<br>SideRank:', sideRank),
marker = list(size = 6, line = list(color = 'black', width = 0.5))) %>%
layout(title = title,
scene = list(xaxis = list(title = 'Anterior-Posterior'),
yaxis = list(title = 'Medial-Lateral'),
zaxis = list(title = 'Ventral-Dorsal')))
return(p)
} #endif
} #endfunction
#given a list of Olfr names, output 1 medial and 1 lateral cluster for each name
ListDorML <- function(x, chooseOut = "plot", title = NA) {
#use a list to build a df of unknown size instead of bind_row each iteration
list_out <- vector("list", length = length(x))
for (i in 1:length(x)) {
list_out[[i]] <- DorsalML(x[i], topBy = 200, chooseOut = "best")
} #endfor
#always need notbest for the shape shell1
notbest <- DorsalML(x[1], chooseOut = "notbest")
df_out <- bind_rows(list_out)
#output
if (chooseOut == "data") {
return(df_out)
} else if (chooseOut == "point") {
df_point <- df_out %>% filter(p50 == clustmaxp)
p <- plot_ly(type = "scatter3d", mode = "markers") %>%
add_trace(data=notbest, x=~AntPos, y=~MedLat, z=~VenDor,
color="shell", opacity=0.15,
text = ~paste('Gene:', olfrname,
'<br>C_size_rank:', clustsizerank,
'<br>C_mean_p50:', clustmeanp,
'<br>C_max_p50:', clustmaxp,
'<br>Cluster:', clust_unique),
marker = list(size = 6)) %>%
add_trace(data=df_point, x=~AntPos, y=~MedLat, z=~VenDor, color=~olfrname,
text = ~paste('Gene:', olfrname,
'<br>C_size_rank:', clustsizerank,
'<br>C_mean_ML:', meanML,
'<br>C_max_p50:', clustmaxp,
'<br>Cluster:', clust_unique,
'<br>SideRank:', sideRank),
marker = list(size = 6, line = list(color = 'black', width = 0.5))) %>%
layout(title = title,
scene = list(xaxis = list(title = 'Anterior-Posterior'),
yaxis = list(title = 'Medial-Lateral'),
zaxis = list(title = 'Ventral-Dorsal')))
return(p)
} else {
p <- plot_ly(type = "scatter3d", mode = "markers") %>%
add_trace(data=notbest, x=~AntPos, y=~MedLat, z=~VenDor,
color="shell", opacity=0.15,
text = ~paste('Gene:', olfrname,
'<br>C_size_rank:', clustsizerank,
'<br>C_mean_p50:', clustmeanp,
'<br>C_max_p50:', clustmaxp,
'<br>Cluster:', clust_unique),
marker = list(size = 6)) %>%
add_trace(data=df_out, x=~AntPos, y=~MedLat, z=~VenDor, color=~olfrname,
text = ~paste('Gene:', olfrname,
'<br>C_size_rank:', clustsizerank,
'<br>C_mean_ML:', meanML,
'<br>C_max_p50:', clustmaxp,
'<br>Cluster:', clust_unique,
'<br>SideRank:', sideRank),
marker = list(size = 6, line = list(color = 'black', width = 0.5))) %>%
layout(title = title,
scene = list(xaxis = list(title = 'Anterior-Posterior'),
yaxis = list(title = 'Medial-Lateral'),
zaxis = list(title = 'Ventral-Dorsal')))
return(p)
} #endif
} #endfunction
#load data
allmice_tpm <- read_csv("~/Desktop/obmap/r_analysis/3dimOB/input/covarintactchemo_over_samples_200923.csv") %>% select(-X1)
## Warning: Missing column names filled in: 'X1' [1]
allmice_covars <- read_csv("~/Desktop/obmap/r_analysis/3dimOB/input/allmice_covariates_trim_voxweights_v3.csv")
symline <- read_csv("~/Desktop/obmap/r_analysis/3dimOB/input/symline.csv")
good_3dpoint <- read_csv("~/Desktop/obmap/r_analysis/3dimOB/output/goodpointORs972_allchemo_listdorML_200820.csv")
info <- read_csv("~/Desktop/obmap/r_analysis/3dimOB/input/knowntanwavgFI.csv") %>%
rename("olfrname" = "gene") %>%
select(olfrname:RTP, known, lowTPM, fisurface)
set.seed(711)
functions loaded from v21_gen25.Rmd
kzY <- allmice_tpm %>% filter(dimrep == 1 | dimrep == 2)
ectopic <- c("Olfr287","Olfr32")
kzYgood <- dplyr::select(kzY, -ectopic) %>% dplyr::select(-name, -rep, -slice, -dim, -dimrep)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(ectopic)` instead of `ectopic` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
kzmY <- as.matrix(kzYgood)
rownames(kzmY) <- paste0("sample", 1:dim(kzY)[1])
#rdirichlet requires vector not dataframe so matrix the tibble
kzX <- read_csv("~/Desktop/obmap/r_analysis/3dimOB/input/allmice_covariates_trim_voxweights_v3.csv") %>% filter(name %in% kzY$name)
## Parsed with column specification:
## cols(
## name = col_character(),
## rep = col_double(),
## slice = col_double(),
## dim = col_character(),
## count = col_double(),
## weight = col_double(),
## glomarea.apOMP.mlvdseb = col_double(),
## totalarea.seb = col_double(),
## count.clear = col_double(),
## count.unclear = col_double(),
## count.total = col_double(),
## glom.seb = col_double(),
## glom.kz = col_double()
## )
N <- dim(kzmY)[1] #number of sections
D <- dim(kzmY)[2] #number of genes
# Voxel System
d1 <- 23 #antpost
d2 <- 22 #medlat
d3 <- 23 #vendor
#load voxels from bulb_in_cube
# voxalls_test <- read_csv("~/Desktop/obmap/r_analysis/3dimOB/input/blankOBcoords200922.csv") %>% mutate(type = "latfill") #incorporating fill for 1 voxel thick layer on most lateral surface
# voxalls_0 <- readRDS("~/Desktop/obmap/r_analysis/3dimOB/input/191010_outer_inner_coords_ml23nohole.RDS")
# voxalls_0 <- bind_rows(voxalls_0, voxalls_test)
# voxalls_1 <- voxalls_0[-which(duplicated(voxalls_0)),]
# vox_ap <- voxalls_1[which(voxalls_1$AntPos <= d1),]
# vox_ml <- vox_ap[which(vox_ap$MedLat <= d2),]
# vox_vd <- vox_ml[which(vox_ml$VenDor <= d3),]
# voxalls <- vox_vd %>% select(-type) %>% unique()
voxalls <- read_csv("~/Desktop/obmap/r_analysis/3dimOB/input/voxalls_200924.csv")
## Parsed with column specification:
## cols(
## AntPos = col_double(),
## MedLat = col_double(),
## VenDor = col_double()
## )
tidy_result <- readRDS("~/Desktop/obmap/r_analysis/3dimOB/tidyresults/200i_allchemo_dimrep12_allvox.RDS")
#figure out number of rows in tidy_result corresponding to each Olfr
#repeat olfrname for each voxel, total rows/1100 (#ofORs) returns voxel #
ranked <- tidy_result %>%
arrange(gene) %>%
mutate(olfrname = rep(colnames(kzmY), each=max(tidy_result$voxel))) %>%
group_by(olfrname) %>%
mutate(voxrankperOR = rank(desc(p50))) %>%
ungroup() %>%
group_by(voxel) %>%
mutate(ORrankpervox = rank(desc(p50))) %>%
ungroup() %>% group_by(gene) %>%
mutate(allVoxAvgp50 = mean(p50)) %>%
ungroup() %>% group_by(gene, AntPos) %>%
mutate(AP_Avgp50 = mean(p50), AP_SNR = p50/AP_Avgp50) %>%
ungroup() %>% group_by(gene, MedLat) %>%
mutate(ML_Avgp50 = mean(p50), ML_SNR = p50/ML_Avgp50) %>%
ungroup() %>% group_by(gene, VenDor) %>%
mutate(VD_Avgp50 = mean(p50), VD_SNR = p50/VD_Avgp50) %>%
ungroup() %>%
mutate(voxSNRdim = (AP_SNR + ML_SNR + VD_SNR)/3) %>%
group_by(gene) %>%
mutate(geneRankSNR = rank(desc(voxSNRdim))) %>%
ungroup() %>% group_by(voxel) %>%
mutate(voxRankSNR = rank(desc(voxSNRdim))) %>%
ungroup() %>%
select(p2.5, p50, p97.5, AntPos:ORrankpervox, geneRankSNR,
voxRankSNR, AP_Avgp50:voxSNRdim, voxel)
allmice_tpm <- allmice_tpm[,1:1121]
#calculate average TPM of an OR across all sections
or_mean_all <- vector(length = ncol(allmice_tpm), mode = "numeric")
for (i in 1:ncol(allmice_tpm)) {
if (i < 6) {
or_mean_all[i] <- NA
} else {
or_mean_all[i] <- sum(allmice_tpm[,i])/nrow(allmice_tpm)
}
}
#plot how many ORs below each TPM cutoff
ors_below <- vector(length = 100, mode = "numeric")
below_num <- vector(length = 100, mode = "numeric")
for (i in 1:100) {
ors_below[i] <- length(which(or_mean_all < 2*i))
below_num[i] <- 2*i
}
ors_below_num <- data.frame(below_num, ors_below)
ggplot(ors_below_num, aes(below_num, ors_below)) +
geom_point()
#using 24 to start as it falls early on the curve and removes 350 ORs
alldim_ap_tpm_cut <- allmice_tpm[,-which(or_mean_all < 10)] %>% filter(dim == "AntPos")
alldim_ml_tpm_cut <- allmice_tpm[,-which(or_mean_all < 10)] %>% filter(dim == "MedLat")
alldim_vd_tpm_cut <- allmice_tpm[,-which(or_mean_all < 10)] %>% filter(dim == "VenDor")
ap_tpm <- allmice_tpm %>% filter(dim == "AntPos")
#calculate average TPM of an OR across all sections
or_mean <- vector(length = ncol(ap_tpm), mode = "numeric")
for (i in 1:ncol(ap_tpm)) {
if (i < 6) {
or_mean[i] <- NA
} else {
or_mean[i] <- sum(ap_tpm[,i])/nrow(ap_tpm)
}
}
#plot how many ORs below each TPM cutoff
ors_below <- vector(length = 100, mode = "numeric")
below_num <- vector(length = 100, mode = "numeric")
for (i in 1:100) {
ors_below[i] <- length(which(or_mean < 2*i))
below_num[i] <- 2*i
}
ors_below_num <- data.frame(below_num, ors_below)
ggplot(ors_below_num, aes(below_num, ors_below)) +
geom_point()
#using 24 to start as it falls early on the curve and removes 350 ORs
tpm_cut <- ap_tpm[,-which(or_mean < 8)]
m4_tpm <- alldim_ap_tpm_cut %>% filter(rep == 4) %>% select(-name, -rep, -dim, -dimrep, -Olfr287, -Olfr32, -Olfr361)
m6_tpm <- alldim_ap_tpm_cut %>% filter(rep == 6) %>% select(-name, -rep, -dim, -dimrep, -Olfr287, -Olfr32, -Olfr361)
m7_tpm <- alldim_ap_tpm_cut %>% filter(rep == 7) %>% select(-name, -rep, -dim, -dimrep, -Olfr287, -Olfr32, -Olfr361)
m10_tpm <- alldim_ap_tpm_cut %>% filter(rep == 10) %>% select(-name, -rep, -dim, -dimrep, -Olfr287, -Olfr32, -Olfr361)
m13_tpm <- alldim_ap_tpm_cut %>% filter(rep == 13) %>% select(-name, -rep, -dim, -dimrep, -Olfr287, -Olfr32, -Olfr361)
m15_tpm <- alldim_ap_tpm_cut %>% filter(rep == 15) %>% select(-name, -rep, -dim, -dimrep, -Olfr287, -Olfr32, -Olfr361)
m4_norm <- NormTPM(m4_tpm)
m6_norm <- NormTPM(m6_tpm)
m7_norm <- NormTPM(m7_tpm)
m10_norm <- NormTPM(m10_tpm)
m13_norm <- NormTPM(m13_tpm)
m15_norm <- NormTPM(m15_tpm)
#hierarchical clustering of both axes
#heatmaply(m10_norm)
#heatmaply_cor(m10_norm)
#plots dont mean much without sorting, good to look at hclust of section axis however. ideally y should move sequentially, may be worth investigating sections that appear out of order for OR tpm total, # missing ORs, # high ORs, etc.
make sort for voxel adjusted TPM
ap_voxweights <- allmice_covars %>% filter(rep == 10) %>% select(slice, weight)
m4_Vnorm <- Voxnormify(m4_tpm, ap_voxweights)
m6_Vnorm <- Voxnormify(m6_tpm, ap_voxweights)
m7_Vnorm <- Voxnormify(m7_tpm, ap_voxweights)
m10_Vnorm <- Voxnormify(m10_tpm, ap_voxweights)
m13_Vnorm <- Voxnormify(m13_tpm, ap_voxweights)
m15_Vnorm <- Voxnormify(m15_tpm, ap_voxweights)
#heatmaply_cor(m10_Vnorm, Rowv=T, Colv=F)
m46710_voxsort <- Sorting_Factory4(m4_Vnorm, m6_Vnorm, m7_Vnorm, m10_Vnorm, method = "posmean")
m4_voxsort <- SortByList(m4_Vnorm, m46710_voxsort)
m6_voxsort <- SortByList(m6_Vnorm, m46710_voxsort)
m7_voxsort <- SortByList(m7_Vnorm, m46710_voxsort)
m10_voxsort <- SortByList(m10_Vnorm, m46710_voxsort)
m13_voxsort <- SortByList(m13_Vnorm, m46710_voxsort)
m15_voxsort <- SortByList(m15_Vnorm, m46710_voxsort)
#heatmaply_cor(m4_voxsort, Rowv = F, Colv = F)
#heatmaply_cor(m6_voxsort, Rowv = F, Colv = F)
#heatmaply_cor(m7_voxsort, Rowv = F, Colv = F)
#heatmaply_cor(m10_voxsort, Rowv = F, Colv = F)
MakeBlackRedHeatmap(m4_voxsort)
MakeBlackRedHeatmap(m6_voxsort)
MakeBlackRedHeatmap(m7_voxsort)
MakeBlackRedHeatmap(m10_voxsort)
MakeBlackRedHeatmap(m13_voxsort)
MakeBlackRedHeatmap(m15_voxsort)
#examine mergeDF used to make sort
m46710mergeV <- normNormDF4(m4_Vnorm, m6_Vnorm, m7_Vnorm, m10_Vnorm)
m46710mergeV_voxnormsort <- SortByList(m46710mergeV, m46710_voxsort)
MakeBlackRedHeatmap(m46710mergeV_voxnormsort)
apcombohm <- MakeBlackRedHeatmap(m4_voxsort) + MakeBlackRedHeatmap(m6_voxsort) + MakeBlackRedHeatmap(m7_voxsort)
vd_tpm <- allmice_tpm %>% filter(dim == "VenDor")
#calculate average TPM of an OR across all sections
vd_or_mean <- vector(length = ncol(vd_tpm), mode = "numeric")
for (i in 1:ncol(vd_tpm)) {
if (i < 6) {
vd_or_mean[i] <- NA
} else {
vd_or_mean[i] <- sum(vd_tpm[,i])/nrow(vd_tpm)
}
}
#plot how many ORs below each TPM cutoff
vd_ors_below <- vector(length = 100, mode = "numeric")
vd_below_num <- vector(length = 100, mode = "numeric")
for (i in 1:100) {
vd_ors_below[i] <- length(which(or_mean < 2*i))
vd_below_num[i] <- 2*i
}
vd_ors_below_num <- data.frame(vd_below_num, vd_ors_below)
ggplot(vd_ors_below_num, aes(vd_below_num, vd_ors_below)) +
geom_point()
#using 24 to start as it falls early on the curve and removes 350 ORs
vd_tpm_cut <- vd_tpm[,-which(vd_or_mean < 8)]
## VD normalize values
m9_tpm <- alldim_vd_tpm_cut %>% filter(rep == 9, slice <= 22) %>% select(-name, -rep, -dim, -dimrep, -Olfr287, -Olfr32, -Olfr361)
m12_tpm <- alldim_vd_tpm_cut %>% filter(rep == 12, slice <= 22) %>% select(-name, -rep, -dim, -dimrep, -Olfr287, -Olfr32, -Olfr361)
m14_tpm <- alldim_vd_tpm_cut %>% filter(rep == 14, slice <= 22) %>% select(-name, -rep, -dim, -dimrep, -Olfr287, -Olfr32, -Olfr361)
m9_norm <- NormTPM(m9_tpm)
m12_norm <- NormTPM(m12_tpm)
m14_norm <- NormTPM(m14_tpm)
#hierarchical clustering of both axes
#heatmaply(m12_norm)
#heatmaply_cor(m12_norm)
#plots dont mean much without sorting, good to look at hclust of section axis however. ideally y should move sequentially, may be worth investigating sections that appear out of order for OR tpm total, # missing ORs, # high ORs, etc.
## VD apply voxel factors
# VD make sort for voxel adjusted TPM
vd_voxweights <- allmice_covars %>% filter(rep == 9) %>% select(slice, weight)
m9_Vnorm <- Voxnormify(m9_tpm, vd_voxweights)
m12_Vnorm <- Voxnormify(m12_tpm, vd_voxweights)
m14_Vnorm <- Voxnormify(m14_tpm, vd_voxweights)
#heatmaply_cor(m9_Vnorm, Rowv=T, Colv=F)
m91214_voxsort <- Sorting_Factory3(m9_Vnorm, m12_Vnorm, m14_Vnorm, method = "posmean")
m9_voxsort <- SortByList(m9_Vnorm, m91214_voxsort)
m12_voxsort <- SortByList(m12_Vnorm, m91214_voxsort)
m14_voxsort <- SortByList(m14_Vnorm, m91214_voxsort)
#heatmaply_cor(m4_voxsort, Rowv = F, Colv = F)
#heatmaply_cor(m6_voxsort, Rowv = F, Colv = F)
#heatmaply_cor(m7_voxsort, Rowv = F, Colv = F)
#heatmaply_cor(m10_voxsort, Rowv = F, Colv = F)
MakeBlackRedHeatmap(m9_voxsort)
MakeBlackRedHeatmap(m12_voxsort)
MakeBlackRedHeatmap(m14_voxsort)
#examine mergeDF used to make sort
m91214mergeV <- normNormDF3(m9_Vnorm, m12_Vnorm, m14_Vnorm)
m91214mergeV_voxnormsort <- SortByList(m91214mergeV, m91214_voxsort)
MakeBlackRedHeatmap(m91214mergeV_voxnormsort)
vdcombohm <- MakeBlackRedHeatmap(m9_voxsort) + MakeBlackRedHeatmap(m12_voxsort) + MakeBlackRedHeatmap(m14_voxsort)
ml_tpm <- allmice_tpm %>% filter(dim == "MedLat")
#calculate average TPM of an OR across all sections
ml_or_mean <- vector(length = ncol(ml_tpm), mode = "numeric")
for (i in 1:ncol(ml_tpm)) {
if (i < 6) {
ml_or_mean[i] <- NA
} else {
ml_or_mean[i] <- sum(ml_tpm[,i])/nrow(ml_tpm)
}
}
#plot how many ORs below each TPM cutoff
ml_ors_below <- vector(length = 100, mode = "numeric")
ml_below_num <- vector(length = 100, mode = "numeric")
for (i in 1:100) {
ml_ors_below[i] <- length(which(ml_or_mean < 2*i))
ml_below_num[i] <- 2*i
}
ml_ors_below_num <- data.frame(ml_below_num, ml_ors_below)
ggplot(ml_ors_below_num, aes(ml_below_num, ml_ors_below)) +
geom_point()
#using 24 to start as it falls early on the curve and removes 350 ORs
ml_tpm_cut <- ml_tpm[,-which(ml_or_mean < 8)]
## ML normalize values
m8_tpm <- alldim_ml_tpm_cut %>% filter(rep == 8, slice <= 19) %>% select(-name, -rep, -dim, -dimrep, -Olfr287, -Olfr32) #olfr361 is NA after normalization
m11_tpm <- alldim_ml_tpm_cut %>% filter(rep == 11, slice <= 19) %>% select(-name, -rep, -dim, -dimrep, -Olfr287, -Olfr32)
m16_tpm <- alldim_ml_tpm_cut %>% filter(rep == 16, slice <= 19) %>% select(-name, -rep, -dim, -dimrep, -Olfr287, -Olfr32)
m8_norm <- NormTPM(m8_tpm)
m11_norm <- NormTPM(m11_tpm)
m16_norm <- NormTPM(m16_tpm)
m16_reorder <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11",
"12", "13", "14", "15", "16", "17", "18", "19")
m16_norm <- m16_norm[,m16_reorder]
#hierarchical clustering of both axes
#heatmaply(m16_norm)
#heatmaply_cor(m8_norm)
#plots dont mean much without sorting, good to look at hclust of section axis however. ideally y should move sequentially, may be worth investigating sections that appear out of order for OR tpm total, # missing ORs, # high ORs, etc.
## ML apply voxel factors
# ML make sort for voxel adjusted TPM
ml_voxweights <- allmice_covars %>% filter(rep == 8) %>% select(slice, weight)
m8_Vnorm <- Voxnormify(m8_tpm, ml_voxweights)
m11_Vnorm <- Voxnormify(m11_tpm, ml_voxweights)
m16_Vnorm <- Voxnormify(m16_tpm, ml_voxweights)
#heatmaply_cor(m9_Vnorm, Rowv=T, Colv=F)
m81116_voxsort <- Sorting_Factory3(m8_Vnorm, m11_Vnorm, m16_Vnorm, method = "posmean")
m8_voxsort <- SortByList(m8_Vnorm, m81116_voxsort)
m11_voxsort <- SortByList(m11_Vnorm, m81116_voxsort)
m16_voxsort <- SortByList(m16_Vnorm, m81116_voxsort)
#heatmaply_cor(m8_voxsort, Rowv = F, Colv = F)
MakeBlackRedHeatmap(m8_voxsort)
MakeBlackRedHeatmap(m11_voxsort)
MakeBlackRedHeatmap(m16_voxsort)
#examine mergeDF used to make sort
m81116mergeV <- normNormDF3(m8_Vnorm, m11_Vnorm, m16_Vnorm)
m81116mergeV_voxnormsort <- SortByList(m81116mergeV, m81116_voxsort)
MakeBlackRedHeatmap(m81116mergeV_voxnormsort)
mlcombohm <- MakeBlackRedHeatmap(m8_voxsort)+MakeBlackRedHeatmap(m11_voxsort)+MakeBlackRedHeatmap(m16_voxsort)
try sorting with kmeans clustering
voxsort_cor <- Multi_Spear_mtx(m4_voxsort, m6_voxsort, m7_voxsort, m10_voxsort)
#heatmaply_cor(voxsort_cor, Rowv=F, Colv=F)
km <- kmeans(scale(m4_voxsort), 23, iter.max = 1000)
km_sort <- tibble("gene" = names(km$cluster), "sortrank" = km$cluster)
m4_kmsort <- SortByList(m4_voxsort, km_sort)
#heatmaply_cor(m4_kmsort, Rowv=F, Colv=F)
#need to add km_sort as well as voxsort and compare, perhaps possible to determine order of km clusters from AtoP
PCA, tsne, dimred plots For each OR, create a vector holding meanposition for all replicates and all dimensions Calculate all pairwise correlations for these vectors and determine a cutoff for covariance, can also compare lowest distance by subtracting or something
alldim_m4 <- alldim_ap_tpm_cut %>% filter(rep == 4) %>% dplyr::select(-name, -rep, -dim, -dimrep)
alldim_m6 <- alldim_ap_tpm_cut %>% filter(rep == 6) %>% select(-name, -rep, -dim, -dimrep)
alldim_m7 <- alldim_ap_tpm_cut %>% filter(rep == 7) %>% select(-name, -rep, -dim, -dimrep)
alldim_m10 <- alldim_ap_tpm_cut %>% filter(rep == 10) %>% select(-name, -rep, -dim, -dimrep)
alldim_m8 <- alldim_ml_tpm_cut %>% filter(rep == 8) %>% select(-name, -rep, -dim, -dimrep)
alldim_m11 <- alldim_ml_tpm_cut %>% filter(rep == 11) %>% select(-name, -rep, -dim, -dimrep)
alldim_m16 <- alldim_ml_tpm_cut %>% filter(rep == 16) %>% select(-name, -rep, -dim, -dimrep)
alldim_m9 <- alldim_vd_tpm_cut %>% filter(rep == 9) %>% select(-name, -rep, -dim, -dimrep)
alldim_m12 <- alldim_vd_tpm_cut %>% filter(rep == 12) %>% select(-name, -rep, -dim, -dimrep)
alldim_m14 <- alldim_vd_tpm_cut %>% filter(rep == 14) %>% select(-name, -rep, -dim, -dimrep)
m4_vnorm_ad <- Voxnormify(alldim_m4, ap_voxweights)
m6_vnorm_ad <- Voxnormify(alldim_m6, ap_voxweights)
m7_vnorm_ad <- Voxnormify(alldim_m7, ap_voxweights)
m10_vnorm_ad <- Voxnormify(alldim_m10, ap_voxweights)
m8_vnorm_ad <- Voxnormify(alldim_m8, ml_voxweights)
m11_vnorm_ad <- Voxnormify(alldim_m11, ml_voxweights)
m16_vnorm_ad <- Voxnormify(alldim_m16, ml_voxweights)
m9_vnorm_ad <- Voxnormify(alldim_m9, vd_voxweights)
m12_vnorm_ad <- Voxnormify(alldim_m12, vd_voxweights)
m14_vnorm_ad <- Voxnormify(alldim_m14, vd_voxweights)
#for each mouse, for each row
dfs <- list(m4_vnorm_ad, m6_vnorm_ad, m7_vnorm_ad, m8_vnorm_ad,
m9_vnorm_ad, m10_vnorm_ad, m11_vnorm_ad, m12_vnorm_ad,
m14_vnorm_ad, m16_vnorm_ad)
#problem is dfs are different lengths, may need to use a different
meanpos_mtx <- matrix(ncol = length(dfs), nrow = nrow(dfs[[1]]))
for (i in 1:length(dfs)) {
df_norm <- dfs[[i]]
secs <- c(1:23)
wavgs <- vector(mode = "numeric", length = nrow(df_norm))
for (j in 1:nrow(df_norm)) {
vals <- df_norm[j,]
sumvals <- sum(vals)
prods <- vector(mode = "numeric", length = length(vals))
for (k in 1:length(vals)) {
prods[k] <- vals[k] * secs[k]
}
wavgs[j] <- sum(prods)/sumvals
}
meanpos_mtx[,i] <- wavgs
}
ornames <- rownames(m4_vnorm_ad)
rownames(meanpos_mtx) <- ornames
colnames(meanpos_mtx) <- c("m4_AP", "m6_AP", "m7_AP", "m8_ML", "m9_VD",
"m10_AP", "m11_ML", "m12_VD", "m14_VD", "m16_ML")
meanpos_tb <- as_tibble(meanpos_mtx) %>% mutate(gene_id = ornames)
meanpos_cor <- cor(t(meanpos_mtx))
meanpos_cov <- cov(t(meanpos_mtx))
#branch400 <- PlotBranches(400)
#saveRDS(branch400, "~/Desktop/obmap/r_analysis/heatmaps/output/branch400.RDS")
branch400 <- readRDS("~/Desktop/obmap/r_analysis/heatmaps/output/branch400.RDS")
#400 branches, only 127 have >1 OR
#300 brnaches, only 122 have > 1 OR
#200 branches, only 118 have > 1 OR
goodcor400 <- which(branch400$cor > 0.95)
gooddist400 <- which(branch400$avg_dist < 12)
goodset <- goodcor400[which(goodcor400 %in% gooddist400)]
bestcor400 <- which(branch400$cor == max(branch400$cor, na.rm=T))
branch400$heatmaps[bestcor400]
## [[1]]
bestdist400 <- which(branch400$avg_dist == min(branch400$avg_dist, na.rm=T))
branch400$`3D`[107]
## [[1]]
#find something to define the branch with the highest ranked cor that is also the highest ranked 3d
heatmaply_cor(meanpos_cor, k_row = 10, dist_method = "euclidean", hclust_method = "complete")
dend <- hclust(dist(meanpos_cor, method = "euclidean"), method = "complete")
cutree(dend, k = 10)
ash_basalpreds_top10 <- c("Olfr140","Olfr742","Olfr477","Olfr222",
"Olfr740","Olfr642","Olfr1535","Olfr288",
"Olfr743","Olfr571")
tree1 <- names(which(cutree(dend, k = 10) == 1))
tree2 <- names(which(cutree(dend, k = 10) == 2))
tree3 <- names(which(cutree(dend, k = 10) == 3))
tree4 <- names(which(cutree(dend, k = 10) == 4))
tree5 <- names(which(cutree(dend, k = 10) == 5))
tree6 <- names(which(cutree(dend, k = 10) == 6))
tree7 <- names(which(cutree(dend, k = 10) == 7))
tree8 <- names(which(cutree(dend, k = 10) == 8))
tree9 <- names(which(cutree(dend, k = 10) == 9))
tree10 <- names(which(cutree(dend, k = 10) == 10))
#AP
ap1 <- MakeBLRHtree4(m4_Vnorm, m6_Vnorm, m7_Vnorm, m10_Vnorm, tree1, "AP-tree1")
ap2 <- MakeBLRHtree4(m4_Vnorm, m6_Vnorm, m7_Vnorm, m10_Vnorm, tree2, "AP-tree2")
ap3 <- MakeBLRHtree4(m4_Vnorm, m6_Vnorm, m7_Vnorm, m10_Vnorm, tree3, "AP-tree3")
ap4 <- MakeBLRHtree4(m4_Vnorm, m6_Vnorm, m7_Vnorm, m10_Vnorm, tree4, "AP-tree4")
ap5 <- MakeBLRHtree4(m4_Vnorm, m6_Vnorm, m7_Vnorm, m10_Vnorm, tree5, "AP-tree5")
ap6 <- MakeBLRHtree4(m4_Vnorm, m6_Vnorm, m7_Vnorm, m10_Vnorm, tree6, "AP-tree6")
ap7 <- MakeBLRHtree4(m4_Vnorm, m6_Vnorm, m7_Vnorm, m10_Vnorm, tree7, "AP-tree7")
ap8 <- MakeBLRHtree4(m4_Vnorm, m6_Vnorm, m7_Vnorm, m10_Vnorm, tree8, "AP-tree8")
ap9 <- MakeBLRHtree4(m4_Vnorm, m6_Vnorm, m7_Vnorm, m10_Vnorm, tree9, "AP-tree9")
ap10 <- MakeBLRHtree4(m4_Vnorm, m6_Vnorm, m7_Vnorm, m10_Vnorm, tree10, "AP-tree10")
#ML
ml1 <- MakeBLRHtree3(m8_Vnorm, m11_Vnorm, m16_Vnorm, tree1, "ML-tree1")
ml2 <- MakeBLRHtree3(m8_Vnorm, m11_Vnorm, m16_Vnorm, tree2, "ML-tree2")
ml3 <- MakeBLRHtree3(m8_Vnorm, m11_Vnorm, m16_Vnorm, tree3, "ML-tree3")
ml4 <- MakeBLRHtree3(m8_Vnorm, m11_Vnorm, m16_Vnorm, tree4, "ML-tree4")
ml5 <- MakeBLRHtree3(m8_Vnorm, m11_Vnorm, m16_Vnorm, tree5, "ML-tree5")
ml6 <- MakeBLRHtree3(m8_Vnorm, m11_Vnorm, m16_Vnorm, tree6, "ML-tree6")
ml7 <- MakeBLRHtree3(m8_Vnorm, m11_Vnorm, m16_Vnorm, tree7, "ML-tree7")
ml8 <- MakeBLRHtree3(m8_Vnorm, m11_Vnorm, m16_Vnorm, tree8, "ML-tree8")
ml9 <- MakeBLRHtree3(m8_Vnorm, m11_Vnorm, m16_Vnorm, tree9, "ML-tree9")
ml10 <- MakeBLRHtree3(m8_Vnorm, m11_Vnorm, m16_Vnorm, tree10, "ML-tree10")
#VD
vd1 <- MakeBLRHtree3(m9_Vnorm, m12_Vnorm, m14_Vnorm, tree1, "VD-tree1")
vd2 <- MakeBLRHtree3(m9_Vnorm, m12_Vnorm, m14_Vnorm, tree2, "VD-tree2")
vd3 <- MakeBLRHtree3(m9_Vnorm, m12_Vnorm, m14_Vnorm, tree3, "VD-tree3")
vd4 <- MakeBLRHtree3(m9_Vnorm, m12_Vnorm, m14_Vnorm, tree4, "VD-tree4")
vd5 <- MakeBLRHtree3(m9_Vnorm, m12_Vnorm, m14_Vnorm, tree5, "VD-tree5")
vd6 <- MakeBLRHtree3(m9_Vnorm, m12_Vnorm, m14_Vnorm, tree6, "VD-tree6")
vd7 <- MakeBLRHtree3(m9_Vnorm, m12_Vnorm, m14_Vnorm, tree7, "VD-tree7")
vd8 <- MakeBLRHtree3(m9_Vnorm, m12_Vnorm, m14_Vnorm, tree8, "VD-tree8")
vd9 <- MakeBLRHtree3(m9_Vnorm, m12_Vnorm, m14_Vnorm, tree9, "VD-tree9")
vd10 <- MakeBLRHtree3(m9_Vnorm, m12_Vnorm, m14_Vnorm, tree10, "VD-tree10")
ap1 + ml1 + vd1
ap2 + ml2 + vd2
ap3 + ml3 + vd3
ap4 + ml4 + vd4
ap5 + ml5 + vd5
ap6 + ml6 + vd6
ap7 + ml7 + vd7
ap8 + ml8 + vd8
ap9 + ml9 + vd9
ap10 + ml10 + vd10
length(tree1)
length(tree2)
length(tree3)
length(tree4)
length(tree5)
length(tree6)
length(tree7)
length(tree8)
length(tree9)
length(tree10)
#removing ectopics from the treesets
#tree7 <- tree7[-which(tree7 == "Olfr287")]
#tree7 <- tree7[-which(tree7 == "Olfr32")]
#
#ListDorML(tree1, "point")
# Plot_listML(tree2, "point")
# Plot_listML(tree3, "point")
# Plot_listML(tree4, "point")
# Plot_listML(tree5, "point")
# Plot_listML(tree6, "point")
# Plot_listML(tree7, "point")
# Plot_listML(tree8, "point")
# Plot_listML(tree9, "point")
# Plot_listML(tree10, "point")
ap2
meanpos_dist <- matrix(nrow = nrow(meanpos_mtx), ncol = nrow(meanpos_mtx))
for (i in 1:nrow(meanpos_mtx)) {
for (j in 1:nrow(meanpos_mtx)) {
meanpos_dist[i,j] <- sum(abs(meanpos_mtx[i,] - meanpos_mtx[j,]))/10
}
}
#are high correlation pairs close in distance?
sd(meanpos_cor)
bestcor <- which(meanpos_cor > 0.6)
sd(meanpos_dist)
bestdistmean <- which(meanpos_dist < 2.5)
bestdistmean[which(bestdistmean < 783)]
bdm_rank <- matrix(nrow = nrow(meanpos_dist), ncol = ncol(meanpos_dist))
for (i in 1:nrow(meanpos_dist)) {
bdm_rank[i,] <- min_rank(meanpos_dist[i,])
}
which(bdm_rank == 2)
rownames(bdm_rank) <- ornames
colnames(bdm_rank) <- ornames
#should we group by dim, determine avgpos of dim, and then compare distance/correlation?
ap_meanpos <- meanpos_tb %>% select(gene_id, m4_AP, m6_AP, m7_AP, m10_AP) %>% mutate(mean_AP = (m4_AP + m6_AP + m7_AP + m10_AP)/4)
vd_meanpos <- meanpos_tb %>% select(gene_id, m9_VD, m12_VD, m14_VD) %>% mutate(mean_VD = (m9_VD + m12_VD + m14_VD)/3)
#excluding m16 due to poor correlation with m8 and m11
#cor(ml_meanpos$m8_ML, ml_meanpos$m16_ML) = -0.2465, -.409 for m11vsm16
ml_meanpos <- meanpos_tb %>% select(gene_id, m8_ML, m11_ML) %>% mutate(mean811_ML = (m8_ML + m11_ML)/2)
dimmeanpos <- data.frame(ap_meanpos$mean_AP, vd_meanpos$mean_VD, ml_meanpos$mean811_ML) %>% data.matrix
dimmeanpos_tb <- ap_meanpos %>% select(gene_id, mean_AP) %>% mutate(mean_VD = vd_meanpos$mean_VD, mean_ML = ml_meanpos$mean811_ML)
#plot as 3D scatter
plot_ly(type = "scatter3d", mode = "markers") %>%
add_trace(data=dimmeanpos_tb, x= ~mean_AP, y= ~mean_ML, z= ~mean_VD,
text = ~paste('Gene: ', gene_id,
'<br>AP:', mean_AP,
'<br>ML:', mean_ML,
'<br>VD:', mean_VD),
marker = list(size = 6, line = list(color = 'black', width = 0.5))) %>%
layout(scene = list(xaxis = list(title = 'Anterior-Posterior'),
yaxis = list(title = 'Medial-Lateral'),
zaxis = list(title = 'Ventral-Dorsal')))
dim_dist <- matrix(nrow = nrow(dimmeanpos), ncol = nrow(dimmeanpos))
for (i in 1:nrow(dimmeanpos)) {
for (j in 1:nrow(dimmeanpos)) {
dim_dist[i,j] <- sum(abs(dimmeanpos[i,] - dimmeanpos[j,]))/3
}
}
#using the distance between dimensional mean positions, define clusters
#for each OR, rank 1 = least distance
dd_rank <- matrix(nrow = nrow(dim_dist), ncol = nrow(dim_dist))
for (i in 1:nrow(dim_dist)) {
dd_rank[i,] <- min_rank(dim_dist[i,])
}
#pick 2 to 6 since the diagonal is 0 which becomes rank 1
top5 <- which(between(x = dd_rank, left = 2, right = 6))
dd_bin <- matrix(nrow = nrow(dd_rank), ncol = nrow(dd_rank), data = 0)
dd_bin[top5] <- 1
#cluster matrix
cluster_list <- rep(NA, nrow(dd_bin))
for (k in 1:ncol(dd_bin)) {
#skip points that are already in a cluster
if (is.na(cluster_list[k])) {
round <- 1
#do a bunch of rounds, find a way to have it run until it stops finding new points
while (round < 3) {
neigh <- which(dd_bin[,k] == 1)
#for each neighbor of previous round of neighbors, find new neighbors
for (l in 1:length(neigh)) {
neigh <- c(neigh, which(dd_bin[,neigh[l]] == 1))
neigh <- neigh[-which(duplicated(neigh))]
neigh <- sort(neigh)
} #endforl
round <- round + 1
} #endwhile
cluster_list[neigh] <- k
} else {
next
} #endif
} #endfork
#umap stuff
greekinfo <- read_csv("~/Desktop/obmap/r_analysis/inputs/ORs_genomicFeatures_mmusGRCm38.99.csv") %>% unique()
## Parsed with column specification:
## cols(
## .default = col_double(),
## olfrname = col_character(),
## greek_name = col_character(),
## greek_score = col_character(),
## closest_greek = col_character(),
## tf_ebf = col_logical(),
## tf_lhx2 = col_logical(),
## tf_h3k9me2 = col_logical(),
## OR_TSS_within500 = col_logical(),
## ensembl_gene = col_character()
## )
## See spec(...) for full column specifications.
umap_meanpos <- umap(meanpos_mtx, n_neighbors = 30, learning_rate = 0.5, init = "spca") %>%
as_tibble %>%
mutate(olfrname = rownames(meanpos_mtx)) %>%
left_join(good_3dpoint, by = "olfrname") %>%
rename("UMAP_1" = V1, "UMAP_2" = V2) %>% filter(!is.na(AntPos)) %>%
mutate(dimdv = ifelse(VenDor >= 13,
"Dorsal",
"Ventral"),
dimml = ifelse(MedLat > 17,
"Lateral",
ifelse(MedLat < 5,
"Medial",
NA)),
dim = ifelse(is.na(dimml), dimdv, dimml)) %>%
select(olfrname, everything()) %>%
rowwise() %>%
mutate(OlfrNum = as.numeric(str_split(olfrname, "r")[[1]][2])) %>%
ungroup() %>%
left_join(greekinfo, by = "olfrname")
#write_csv(umap_meanpos, "~/Desktop/obmap/r_analysis/heatmaps_starrsem/obmap_heatmaps_gen25_v200818/umap_meanpos_200831.csv")
ggplot(umap_meanpos) +
geom_point(aes(UMAP_1, UMAP_2, color = class_fct)) + ggtitle("Class")
ggplot(umap_meanpos) +
geom_point(aes(UMAP_1, UMAP_2, color = oe_region)) + ggtitle("oe_region")
ggplot(umap_meanpos) +
geom_point(aes(UMAP_1, UMAP_2, color = OlfrNum)) + ggtitle("Gene name #")
ggplot(umap_meanpos) +
geom_point(aes(UMAP_1, UMAP_2, color = cluster_id)) + ggtitle("ClusterID")
ggplot(umap_meanpos) +
geom_point(aes(UMAP_1, UMAP_2, color = closest_greek)) + ggtitle("Closest Greek")
ggplot(umap_meanpos) +
geom_point(aes(UMAP_1, UMAP_2, color = chromosome)) + ggtitle("chr")
appp_vnorm <- MultiAPPP(m4_Vnorm, m6_Vnorm, m7_Vnorm, m10_Vnorm, "_m4", "_m6", "_m7", "_m10")
forcomboant <- appp_vnorm %>% select(antpeak_m4, antpeak_m6) %>%
rename(combo4 = antpeak_m4, combo6 = antpeak_m6)
forcombopost <- appp_vnorm %>% select(postpeak_m4, postpeak_m6) %>%
rename(combo4 = postpeak_m4, combo6 = postpeak_m6)
forcombo <- bind_rows(forcomboant, forcombopost)
#plot ant vs ant, post vs post
ggplot(appp_vnorm) +
geom_jitter(aes(antpeak_m4, antpeak_m6), color = "red", alpha = 0.4) +
geom_jitter(aes(postpeak_m4, postpeak_m6), color = "blue", alpha = 0.4) +
geom_abline(slope = 1) +
geom_smooth(data = forcombo, aes(combo4, combo6), color = "purple", method = "lm", se=F)
## `geom_smooth()` using formula 'y ~ x'
# #above as boxes or violins
# ggplot(appp_vnorm %>% mutate(apm4 = antpeak_m4 + 100, ppm4 = postpeak_m4 + 100)) +
# geom_boxplot(aes(as.character(apm4), antpeak_m6), color = "red", alpha = 0.3) +
# geom_boxplot(aes(as.character(ppm4), postpeak_m6), color = "blue", alpha = 0.3)
#
# #above as faceted for ant and post
# m46facet_a <- appp_vnorm %>% select(gene, antpeak_m4, antpeak_m6) %>%
# mutate(ap = "ant") %>%
# rename(peak_4 = antpeak_m4, peak_6 = antpeak_m6)
# m46facet_p <- appp_vnorm %>% select(gene, postpeak_m4, postpeak_m6) %>%
# mutate(ap = "post") %>%
# rename(peak_4 = postpeak_m4, peak_6 = postpeak_m6)
# m46facet <- bind_rows(m46facet_a, m46facet_p)
#
# ggplot(m46facet) +
# geom_boxplot(aes(as.character(peak_4 + 100), peak_6)) +
# facet_grid(ap ~ .)
# examine distances for ant and post peaks between mice
# calculate distances
appp_vnorm_dist <- appp_vnorm %>% mutate(antdist_46 = abs(antpeak_m4 - antpeak_m6),
antdist_47 = abs(antpeak_m4 - antpeak_m7),
antdist_410 = abs(antpeak_m4 - antpeak_m10),
antdist_67 = abs(antpeak_m6 - antpeak_m7),
antdist_610 = abs(antpeak_m6 - antpeak_m10),
antdist_710 = abs(antpeak_m7 - antpeak_m10),
antdistmean_46710 = (antdist_46 + antdist_47 +
antdist_410 + antdist_67 +
antdist_610 + antdist_710)/6,
postdist_46 = abs(postpeak_m4 - postpeak_m6),
postdist_47 = abs(postpeak_m4 - postpeak_m7),
postdist_410 = abs(postpeak_m4 - postpeak_m10),
postdist_67 = abs(postpeak_m6 - postpeak_m7),
postdist_610 = abs(postpeak_m6 - postpeak_m10),
postdist_710 = abs(postpeak_m7 - postpeak_m10),
postdistmean_46710 = (postdist_46 + postdist_47 +
postdist_410 + postdist_67 +
postdist_610 + postdist_710)/6,
aptotal_46 = antdist_46 + postdist_46,
aptotal_47 = antdist_47 + postdist_47,
aptotal_410 = antdist_410 + postdist_410,
aptotal_67 = antdist_67 + postdist_67,
aptotal_610 = antdist_610 + postdist_610,
aptotal_710 = antdist_710 + postdist_710,
aptotal_mean = (aptotal_46 + aptotal_47 +
aptotal_410 + aptotal_67 +
aptotal_610 + aptotal_710)/6,
apmean_46 = (antdist_46 + postdist_46)/2,
apmean_47 = (antdist_47 + postdist_47)/2,
apmean_410 = (antdist_410 + postdist_410)/2,
apmean_67 = (antdist_67 + postdist_67)/2,
apmean_610 = (antdist_610 + postdist_610)/2,
apmean_710 = (antdist_710 + postdist_710)/2,
apmean_mean = (apmean_46 + apmean_47 +
apmean_410 + apmean_67 +
apmean_610 + apmean_710)/6)
#test correlation
cor(appp_vnorm$antpeak_m4, appp_vnorm$antpeak_m6, method = "spearman")
## [1] 0.6261142
#how many ORs have identical distance between ant and post peak (could be diff directions thou)
length(which(appp_vnorm_dist$antdist_46 == appp_vnorm_dist$postdist_46))
## [1] 155
#plot distance between ant peaks for m4 and m6
#for these two mice, most ORs have anterior peaks that are within 2 sections
ggplot(appp_vnorm_dist) +
geom_histogram(aes(antdist_46), fill = "red")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#plot distance between post peaks for m4 and m6
#for these two mice, most ORs have posterior peaks that are within 2.5 sections
ggplot(appp_vnorm_dist) +
geom_histogram(aes(postdist_46), fill = "blue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#plot mean distance between all mice for all ant and all post peaks for each OR
#across all mice, most ORs have anterior and posterior peaks within 5 sections
ggplot(appp_vnorm_dist) +
geom_histogram(aes(apmean_mean))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#plot ant vs post distance for m4 vs m6
#for these two mice, most ORs have anterior and posterior peaks within 2 sections
#46 = 2
#47 = 4
#410 = 4
#67 = 3
#610 = 4
#710 = 4
ggplot(appp_vnorm_dist %>% group_by(antdist_46, postdist_46) %>% mutate(count = n())) +
geom_point(aes(antdist_46, postdist_46, size = count), alpha = 0.25) +
geom_smooth(aes(antdist_46, postdist_46))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#plot ant vs post distance mean for all mice
ggplot(appp_vnorm_dist %>% group_by(antdistmean_46710, postdistmean_46710) %>% mutate(count = n())) +
geom_point(aes(antdistmean_46710, postdistmean_46710, size = count), alpha = 0.25) +
geom_smooth(aes(antdistmean_46710, postdistmean_46710))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Should be slanted across the OB moving more laterally as it moves more posterior
#hmm im not really seeing what i expect
#basically says an ML plane ~13 is the symmetry line
#SymLiner(m46710mergeV_voxnormsort, m91214mergeV_voxnormsort, m81116mergeV_voxnormsort, out="plot")
#lets try a single replicate
dr1 <- SymLiner(m10_Vnorm, m9_Vnorm, m8_Vnorm)
dr2 <- SymLiner(m13_Vnorm, m12_Vnorm, m11_Vnorm)
dr3 <- SymLiner(m15_Vnorm, m14_Vnorm, m16_Vnorm) #ml for m16 is flipped so actually lm
fit1 <- SymLiner(m10_Vnorm, m9_Vnorm, m8_Vnorm, out = "fit")
fit2 <- SymLiner(m13_Vnorm, m12_Vnorm, m11_Vnorm, out = "fit")
fit3 <- SymLiner(m15_Vnorm, m14_Vnorm, m16_Vnorm, out = "fit")
ggplot() +
geom_blank() +
geom_abline(aes(slope = 0.32655, intercept = 7.61119, color = "rep1")) +
geom_abline(aes(slope = 0.24922, intercept = 8.83890, color = "rep2")) +
geom_abline(aes(slope = 0.287885, intercept = 8.225045, color = "mean")) +
xlim(0,23) +
ylim(0,22) +
ggtitle("Glomeruli symmetry line as calculated by single dimension heatmap peaks") +
xlab("Mean position of top 2 AP peaks") +
ylab("Mean position of top 2 ML peaks")
apvals <- c(1:28)
mlvals <- vector(length = length(apvals), mode = "numeric")
for (i in 1:length(apvals)) {
mlvals[i] <- round(8.225045 + 0.287885 * apvals[i], digits = 2)
}
symline <- tibble(apvals, mlvals)
write_csv(symline, "~/Desktop/obmap/r_analysis/heatmaps/output/symline.csv")
#for dr1: m10 and m8, goes from 8.83 to 14.3 while AP moves 1->23 (slope = 0.24922)
#for dr2: m11 and m13, ML symmetry line goes from 7.61 to 14.8 while AP moves 1->23 (slope = 0.32655)
#i think the ml and ap ones are trimmed to fit multiple replicates, may need to use original to see if any differences in symline arise
#test other indiv replicates as well as all ap vs all ml
#export results for v20 model
#perhaps I should not be using mice averaged peaks and instead do AP1 vs ML1 etc.
#or i could be discounting ORs whose glomeruli lie very close the symmetry line in the ML dim since AntPeakPostPeak function has a minimum distance of separation
#m46710_voxsort is AP
#m81116_voxsort is ML
#m91214_voxsort is VD
ap_feats <- PlotFeatures(m46710_voxsort, title = "AP Features")
ml_feats <- PlotFeatures(m81116_voxsort, title = "ML Features")
vd_feats <- PlotFeatures(m91214_voxsort, title = "VD Features")
ap_feats
vd_feats
ml_feats
#FIsurface ORs
ap_fi <- PlotFeatures(m46710_voxsort, title = "AP Features", featureOut = "fi")
ml_fi <- PlotFeatures(m81116_voxsort, title = "ML Features", featureOut = "fi")
vd_fi <- PlotFeatures(m91214_voxsort, title = "VD Features", featureOut = "fi")
ap_fi
vd_fi
ml_fi
DV location comes from tanindex
ap_allreps <- MultiAPPP(m4_Vnorm, m6_Vnorm, m7_Vnorm, m10_Vnorm,
"_ap4", "_ap6", "_ap7", "_ap10")
# ap_first should be more lateral
ap_first <- ap_allreps %>% select(gene, contains("ant")) %>%
rename(AntPos4 = antpeak_ap4, AntPos6 = antpeak_ap6,
AntPos7 = antpeak_ap7, AntPos10 = antpeak_ap10,
APval4 = antval_ap4, APval6 = antval_ap6,
APval7 = antval_ap7, APval10 = antval_ap10) %>%
rowwise() %>%
mutate(side = "Lateral",
AntPosAvg = mean(c(AntPos4, AntPos6, AntPos7, AntPos10))) %>%
ungroup()
# ap_second should be more medial
ap_second <- ap_allreps %>% select(gene, contains("post"))%>%
rename(AntPos4 = postpeak_ap4, AntPos6 = postpeak_ap6,
AntPos7 = postpeak_ap7, AntPos10 = postpeak_ap10,
APval4 = postval_ap4, APval6 = postval_ap6,
APval7 = postval_ap7, APval10 = postval_ap10) %>%
mutate(side = "Medial",
AntPosAvg = mean(c(AntPos4, AntPos6, AntPos7, AntPos10))) %>%
ungroup()
ml_allreps <- MultiAPPP(m8_voxsort, m11_voxsort, m16_voxsort, m13_voxsort,
"_ml8", "_ml11", "_ml16", "_ap13")
# ml_first is more medial
ml_first <- ml_allreps %>%
select(gene, antpeak_ml8, antpeak_ml11, antpeak_ml16,
postpeak_ap13, antval_ml8, antval_ml11, antval_ml16, postval_ap13) %>%
rename(MedLat8 = antpeak_ml8, MedLat11 = antpeak_ml11,
MedLat16 = antpeak_ml16, AntPos13 = postpeak_ap13,
MLval8 = antval_ml8, MLval11 = antval_ml11,
MLval16 = antval_ml16, APval13 = postval_ap13) %>%
rowwise() %>%
mutate(MedLatAvg = mean(c(MedLat8, MedLat11, MedLat16))) %>%
ungroup()
# ml_second is more lateral
ml_second <- ml_allreps %>%
select(gene, postpeak_ml8, postpeak_ml11, postpeak_ml16,
antpeak_ap13, postval_ml8, postval_ml11, postval_ml16, antval_ap13) %>%
rename(MedLat8 = postpeak_ml8, MedLat11 = postpeak_ml11,
MedLat16 = postpeak_ml16, AntPos13 = antpeak_ap13,
MLval8 = postval_ml8, MLval11 = postval_ml11,
MLval16 = postval_ml16, APval13 = antval_ap13) %>%
rowwise() %>%
mutate(MedLatAvg = mean(c(MedLat8, MedLat11, MedLat16))) %>%
ungroup()
vd_allreps <- MultiAPPP(m9_voxsort, m12_voxsort, m14_voxsort, m15_voxsort,
"_vd9", "_vd12", "_vd14", "_ap15")
vd_first <- vd_allreps %>% select(gene, contains("ant")) %>%
rename(VenDor9 = antpeak_vd9, VenDor12 = antpeak_vd12,
VenDor14 = antpeak_vd14, AntPos15 = antpeak_ap15,
VDval9 = antval_vd9, VDval12 = antval_vd12,
VDval14 = antval_vd14, APval15 = antval_ap15) %>%
rowwise() %>%
mutate(VenDorAvg = mean(c(VenDor9, VenDor12, VenDor14))) %>%
ungroup()
vd_second <- vd_allreps %>% select(gene, contains("post")) %>%
rename(VenDor9 = postpeak_vd9, VenDor12 = postpeak_vd12,
VenDor14 = postpeak_vd14, AntPos15 = postpeak_ap15,
VDval9 = postval_vd9, VDval12 = postval_vd12,
VDval14 = postval_vd14, APval15 = postval_ap15) %>%
rowwise() %>%
mutate(VenDorAvg = mean(c(VenDor9, VenDor12, VenDor14))) %>%
ungroup()
#using vd first for both
lateral_join <- left_join(ap_first, ml_second, by = "gene") %>% left_join(vd_first, by = "gene")
medial_join <- left_join(ap_second, ml_first, by = "gene") %>% left_join(vd_second, by = "gene")
heatmap_peaks <- bind_rows(lateral_join, medial_join) %>%
rename(olfrname = gene) %>%
rowwise() %>%
mutate(AntPosAvg = mean(c(AntPos4, AntPos6, AntPos7, AntPos10, AntPos13, AntPos15))) %>%
ungroup() %>%
left_join(info, by = "olfrname") %>%
arrange(olfrname) %>%
mutate(VDtz = ifelse(tzsimple <= 2, 23 - 10*(tzsimple - 1), 14 - 13*(tzsimple - 2)/3)) %>%
group_by(olfrname) %>%
mutate(VDavg = mean(c(VenDorAvg)))
write_csv(heatmap_peaks, "~/Desktop/obmap/r_analysis/heatmaps/output/heatmap_peaks.csv")
ap_vox <- m46710_voxsort %>% rename(AntPos = sortrank)
vd_vox <- m91214_voxsort %>% rename(VenDor = sortrank)
ml_vox <- m81116_voxsort %>% rename(MedLat = sortrank)
ranklist <- left_join(ap_vox, vd_vox, by = "gene")
ranklist2 <- left_join(ranklist, ml_vox, by = "gene")
write_csv(ranklist2, "~/Desktop/obmap/r_analysis/heatmaps/output/dim_ranklist_200912.csv")
#legacy heatmap stuff obmap excel norm, etc.